Compare commits

...

3 Commits

Author SHA1 Message Date
Christian Schwarz
312c62cf97 add a reasonable assertion 2024-06-13 18:15:33 +02:00
Christian Schwarz
f787eb1d3b test_vm_bit_clear_on_heap_lock_blackbox: dump layer map while flaky code is running
refs https://github.com/neondatabase/neon/issues/6967
2024-06-13 15:20:56 +02:00
Christian Schwarz
942446f312 carry reconstruct data 2024-06-13 12:31:50 +02:00
12 changed files with 159 additions and 23 deletions

View File

@@ -4,6 +4,10 @@ version = "0.1.0"
edition.workspace = true
license.workspace = true
[features]
default = []
testing = [ "pageserver_api/testing" ]
[dependencies]
anyhow.workspace = true
async-trait.workspace = true

View File

@@ -383,6 +383,12 @@ impl PageServerNode {
.map(|x| x.parse::<AuxFilePolicy>())
.transpose()
.context("Failed to parse 'switch_aux_file_policy'")?,
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: settings
.remove("test_vm_bit_debug_logging")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'test_vm_bit_debug_logging' as bool")?,
};
if !settings.is_empty() {
bail!("Unrecognized tenant settings: {settings:?}")
@@ -506,6 +512,12 @@ impl PageServerNode {
.map(|x| x.parse::<AuxFilePolicy>())
.transpose()
.context("Failed to parse 'switch_aux_file_policy'")?,
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: settings
.remove("test_vm_bit_debug_logging")
.map(|x| x.parse::<bool>())
.transpose()
.context("Failed to parse 'test_vm_bit_debug_logging' as bool")?,
}
};

View File

@@ -4,6 +4,10 @@ version = "0.1.0"
edition.workspace = true
license.workspace = true
[features]
default = []
testing = []
[dependencies]
serde.workspace = true
serde_with.workspace = true

View File

@@ -322,6 +322,8 @@ pub struct TenantConfig {
pub timeline_get_throttle: Option<ThrottleConfig>,
pub image_layer_creation_check_threshold: Option<u8>,
pub switch_aux_file_policy: Option<AuxFilePolicy>,
#[cfg(feature = "testing")]
pub test_vm_bit_debug_logging: Option<bool>,
}
/// The policy for the aux file storage. It can be switched through `switch_aux_file_policy`

View File

@@ -8,7 +8,7 @@ license.workspace = true
default = []
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
# which adds some runtime cost to run tests on outage conditions
testing = ["fail/failpoints"]
testing = ["fail/failpoints", "pageserver_api/testing"]
[dependencies]
anyhow.workspace = true

View File

@@ -3831,6 +3831,8 @@ pub(crate) mod harness {
tenant_conf.image_layer_creation_check_threshold,
),
switch_aux_file_policy: Some(tenant_conf.switch_aux_file_policy),
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: Some(tenant_conf.test_vm_bit_debug_logging),
}
}
}
@@ -6364,7 +6366,7 @@ mod tests {
.await?;
Ok(res.pop_last().map(|(k, v)| {
assert_eq!(k, key);
v.unwrap()
v.unwrap().0
}))
}

View File

@@ -377,6 +377,9 @@ pub struct TenantConf {
/// There is a `last_aux_file_policy` flag which gets persisted in `index_part.json` once the first aux
/// file is written.
pub switch_aux_file_policy: AuxFilePolicy,
#[cfg(feature = "testing")]
pub test_vm_bit_debug_logging: bool,
}
/// Same as TenantConf, but this struct preserves the information about
@@ -476,6 +479,11 @@ pub struct TenantConfOpt {
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub switch_aux_file_policy: Option<AuxFilePolicy>,
#[cfg(feature = "testing")]
#[serde(skip_serializing_if = "Option::is_none")]
#[serde(default)]
pub test_vm_bit_debug_logging: Option<bool>,
}
impl TenantConfOpt {
@@ -538,6 +546,10 @@ impl TenantConfOpt {
switch_aux_file_policy: self
.switch_aux_file_policy
.unwrap_or(global_conf.switch_aux_file_policy),
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: self
.test_vm_bit_debug_logging
.unwrap_or(global_conf.test_vm_bit_debug_logging),
}
}
}
@@ -582,6 +594,8 @@ impl Default for TenantConf {
timeline_get_throttle: crate::tenant::throttle::Config::disabled(),
image_layer_creation_check_threshold: DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD,
switch_aux_file_policy: AuxFilePolicy::default_tenant_config(),
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: false,
}
}
}
@@ -657,6 +671,8 @@ impl From<TenantConfOpt> for models::TenantConfig {
timeline_get_throttle: value.timeline_get_throttle.map(ThrottleConfig::from),
image_layer_creation_check_threshold: value.image_layer_creation_check_threshold,
switch_aux_file_policy: value.switch_aux_file_policy,
#[cfg(feature = "testing")]
test_vm_bit_debug_logging: value.test_vm_bit_debug_logging,
}
}
}

View File

@@ -73,7 +73,7 @@ where
/// the same ValueReconstructState struct in the next 'get_value_reconstruct_data'
/// call, to collect more records.
///
#[derive(Debug, Default)]
#[derive(Debug, Default, Clone, PartialEq, Eq)]
pub struct ValueReconstructState {
pub records: Vec<(Lsn, NeonWalRecord)>,
pub img: Option<(Lsn, Bytes)>,

View File

@@ -822,6 +822,7 @@ impl DeltaLayerInner {
if entry_lsn < lsn_range.start {
return false;
}
assert!(entry_lsn <= search_key.lsn(), "certain because of how backwards visit direction works");
offsets.push((entry_lsn, blob_ref.pos()));
!blob_ref.will_init()

View File

@@ -910,7 +910,9 @@ impl Timeline {
img: cached_page_img,
};
self.get_impl(key, lsn, reconstruct_state, ctx).await
self.get_impl(key, lsn, reconstruct_state, ctx)
.await
.map(|v| v.0)
}
GetImpl::Vectored => {
let keyspace = KeySpace {
@@ -922,16 +924,59 @@ impl Timeline {
let mut reconstruct_state = ValuesReconstructState::new();
// Only add the cached image to the reconstruct state when it exists.
let cached_page_img_lsn = cached_page_img.as_ref().map(|(lsn, _)| *lsn);
if cached_page_img.is_some() {
let mut key_state = VectoredValueReconstructState::default();
key_state.img = cached_page_img;
reconstruct_state.keys.insert(key, Ok(key_state));
}
let debug_log = {
#[cfg(feature = "testing")]
{
self.get_test_vm_bit_debug_logging()
}
#[cfg(not(feature = "testing"))]
{
false
}
};
if debug_log {
tracing::info!(%key, %lsn, ?cached_page_img_lsn, "debug-logging page reconstruction");
}
if debug_log {
tracing::info!(
location = "before vectored get",
"debug-logging page reconstruction"
);
self.layers
.read()
.await
.layer_map()
.dump(false, ctx)
.await
.unwrap();
}
let vectored_res = self
.get_vectored_impl(keyspace.clone(), lsn, &mut reconstruct_state, ctx)
.await;
if debug_log {
tracing::info!(
location = "before validation",
"debug-logging page reconstruction"
);
self.layers
.read()
.await
.layer_map()
.dump(false, ctx)
.await
.unwrap();
}
if self.conf.validate_vectored_get {
self.validate_get_vectored_impl(&vectored_res, keyspace, lsn, ctx)
.await;
@@ -949,7 +994,7 @@ impl Timeline {
"Singular vectored get returned wrong key"
)))
} else {
value
value.map(|v| v.0)
}
}
None => Err(PageReconstructError::MissingKey(MissingKeyError {
@@ -973,7 +1018,7 @@ impl Timeline {
lsn: Lsn,
mut reconstruct_state: ValueReconstructState,
ctx: &RequestContext,
) -> Result<Bytes, PageReconstructError> {
) -> Result<(Bytes, ValueReconstructState), PageReconstructError> {
// XXX: structured stats collection for layer eviction here.
trace!(
"get page request for {}@{} from task kind {:?}",
@@ -1112,7 +1157,12 @@ impl Timeline {
}
}
res
let Ok(res) = res else {
return Err(res.unwrap_err());
};
Ok(BTreeMap::from_iter(
res.into_iter().map(|(k, v)| (k, v.map(|v| v.0))),
))
}
/// Scan the keyspace and return all existing key-values in the keyspace. This currently uses vectored
@@ -1176,7 +1226,12 @@ impl Timeline {
recording.observe(throttled);
}
vectored_res
let Ok(vectored_res) = vectored_res else {
return Err(vectored_res.unwrap_err());
};
Ok(BTreeMap::from_iter(
vectored_res.into_iter().map(|(k, v)| (k, v.map(|v| v.0))),
))
}
/// Not subject to [`Self::timeline_get_throttle`].
@@ -1185,7 +1240,10 @@ impl Timeline {
keyspace: KeySpace,
lsn: Lsn,
ctx: &RequestContext,
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
) -> Result<
BTreeMap<Key, Result<(Bytes, ValueReconstructState), PageReconstructError>>,
GetVectoredError,
> {
let mut values = BTreeMap::new();
for range in keyspace.ranges {
@@ -1244,7 +1302,10 @@ impl Timeline {
lsn: Lsn,
reconstruct_state: &mut ValuesReconstructState,
ctx: &RequestContext,
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
) -> Result<
BTreeMap<Key, Result<(Bytes, ValueReconstructState), PageReconstructError>>,
GetVectoredError,
> {
let get_kind = if keyspace.total_raw_size() == 1 {
GetKind::Singular
} else {
@@ -1261,7 +1322,10 @@ impl Timeline {
let reconstruct_timer = crate::metrics::RECONSTRUCT_TIME
.for_get_kind(get_kind)
.start_timer();
let mut results: BTreeMap<Key, Result<Bytes, PageReconstructError>> = BTreeMap::new();
let mut results: BTreeMap<
Key,
Result<(Bytes, ValueReconstructState), PageReconstructError>,
> = BTreeMap::new();
let layers_visited = reconstruct_state.get_layers_visited();
for (key, res) in std::mem::take(&mut reconstruct_state.keys) {
@@ -1297,7 +1361,10 @@ impl Timeline {
/// Not subject to [`Self::timeline_get_throttle`].
pub(super) async fn validate_get_vectored_impl(
&self,
vectored_res: &Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError>,
vectored_res: &Result<
BTreeMap<Key, Result<(Bytes, ValueReconstructState), PageReconstructError>>,
GetVectoredError,
>,
keyspace: KeySpace,
lsn: Lsn,
ctx: &RequestContext,
@@ -1370,8 +1437,8 @@ impl Timeline {
key: &Key,
keyspace: &KeySpace,
lsn: Lsn,
seq: &Bytes,
vec: &Bytes,
(seq, seq_reconstruct_state): &(Bytes, ValueReconstructState),
(vec, vec_reconstruct_state): &(Bytes, ValueReconstructState),
) {
if *key == AUX_FILES_KEY {
// The value reconstruct of AUX_FILES_KEY from records is not deterministic
@@ -1394,10 +1461,16 @@ impl Timeline {
}
} else {
// All other keys should reconstruct deterministically, so we simply compare the blobs.
assert_eq!(
seq, vec,
"Image mismatch for key {key} - keyspace={keyspace:?} lsn={lsn}"
);
if seq != vec {
assert_eq!(
seq_reconstruct_state, vec_reconstruct_state,
"Reconstruct state mismatch for key {key} - keyspace={keyspace:?} lsn={lsn}"
);
assert_eq!(
seq, vec,
"Image mismatch for key {key} - keyspace={keyspace:?} lsn={lsn}"
);
}
}
}
@@ -2166,6 +2239,15 @@ impl Timeline {
.unwrap_or(default_tenant_conf.evictions_low_residence_duration_metric_threshold)
}
#[cfg(feature = "testing")]
fn get_test_vm_bit_debug_logging(&self) -> bool {
let tenant_conf = self.tenant_conf.load();
tenant_conf
.tenant_conf
.test_vm_bit_debug_logging
.unwrap_or(self.conf.default_tenant_conf.test_vm_bit_debug_logging)
}
fn get_image_layer_creation_check_threshold(&self) -> u8 {
let tenant_conf = self.tenant_conf.load();
tenant_conf
@@ -2181,6 +2263,10 @@ impl Timeline {
pub(super) fn tenant_conf_updated(&self, new_conf: &TenantConfOpt) {
// NB: Most tenant conf options are read by background loops, so,
// changes will automatically be picked up.
#[cfg(feature = "testing")]
{
info!(?new_conf.test_vm_bit_debug_logging, "updating tenant conf");
}
// The threshold is embedded in the metric. So, we need to update it.
{
@@ -4356,7 +4442,7 @@ impl Timeline {
let mut total_kb_retrieved = 0;
let mut total_keys_retrieved = 0;
for (k, v) in data {
let v = v.map_err(CreateImageLayersError::PageReconstructError)?;
let (v, _) = v.map_err(CreateImageLayersError::PageReconstructError)?;
total_kb_retrieved += KEY_SIZE + v.len();
total_keys_retrieved += 1;
new_data.insert(k, v);
@@ -5151,7 +5237,7 @@ impl Timeline {
key: Key,
request_lsn: Lsn,
mut data: ValueReconstructState,
) -> Result<Bytes, PageReconstructError> {
) -> Result<(Bytes, ValueReconstructState), PageReconstructError> {
// Perform WAL redo if needed
data.records.reverse();
@@ -5164,7 +5250,7 @@ impl Timeline {
img_lsn,
request_lsn,
);
Ok(img.clone())
Ok((img.clone(), data))
} else {
Err(PageReconstructError::from(anyhow!(
"base image for {key} at {request_lsn} not found"
@@ -5196,6 +5282,8 @@ impl Timeline {
let last_rec_lsn = data.records.last().unwrap().0;
let ret_state = data.clone();
let img = match self
.walredo_mgr
.as_ref()
@@ -5226,7 +5314,7 @@ impl Timeline {
}
}
Ok(img)
Ok((img, ret_state))
}
}
}

View File

@@ -1064,7 +1064,7 @@ impl Timeline {
img: base_image,
records: delta_above_base_image,
};
let img = tline.reconstruct_value(key, horizon, state).await?;
let (img, _) = tline.reconstruct_value(key, horizon, state).await?;
Ok((keys_above_horizon, img))
}

View File

@@ -287,6 +287,13 @@ def test_vm_bit_clear_on_heap_lock_blackbox(neon_env_builder: NeonEnvBuilder):
# already truncated away.
#
# ERROR: could not access status of transaction 1027
# Debugging https://github.com/neondatabase/neon/issues/6967
# the select() below fails occassionally at get_impl="vectored" validation
env.pageserver.http_client().patch_tenant_config_client_side(
tenant_id,
{"test_vm_bit_debug_logging": True},
)
cur.execute("select xmin, xmax, * from vmtest_lock where id = 40000 for update")
tup = cur.fetchall()
log.info(f"tuple = {tup}")