mirror of
https://github.com/neondatabase/neon.git
synced 2025-12-22 21:59:59 +00:00
feat(pageserver): add db rel count as feature flag property (#12632)
## Problem As part of the reldirv2 rollout: LKB-197. We will use number of db/rels as a criteria whether to rollout reldirv2 directly on the write path (simplest and easiest way of rollout). If the number of rel/db is small then it shouldn't take too long time on the write path. ## Summary of changes * Compute db/rel count during basebackup. * Also compute it during logical size computation. * Collect maximum number of db/rel across all timelines in the feature flag propeties. --------- Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -11,6 +11,7 @@
|
||||
//! from data stored in object storage.
|
||||
//!
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Instant, SystemTime};
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
@@ -420,12 +421,16 @@ where
|
||||
}
|
||||
|
||||
let mut min_restart_lsn: Lsn = Lsn::MAX;
|
||||
|
||||
let mut dbdir_cnt = 0;
|
||||
let mut rel_cnt = 0;
|
||||
|
||||
// Create tablespace directories
|
||||
for ((spcnode, dbnode), has_relmap_file) in
|
||||
self.timeline.list_dbdirs(self.lsn, self.ctx).await?
|
||||
{
|
||||
self.add_dbdir(spcnode, dbnode, has_relmap_file).await?;
|
||||
|
||||
dbdir_cnt += 1;
|
||||
// If full backup is requested, include all relation files.
|
||||
// Otherwise only include init forks of unlogged relations.
|
||||
let rels = self
|
||||
@@ -433,6 +438,7 @@ where
|
||||
.list_rels(spcnode, dbnode, Version::at(self.lsn), self.ctx)
|
||||
.await?;
|
||||
for &rel in rels.iter() {
|
||||
rel_cnt += 1;
|
||||
// Send init fork as main fork to provide well formed empty
|
||||
// contents of UNLOGGED relations. Postgres copies it in
|
||||
// `reinit.c` during recovery.
|
||||
@@ -455,6 +461,10 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
self.timeline
|
||||
.db_rel_count
|
||||
.store(Some(Arc::new((dbdir_cnt, rel_cnt))));
|
||||
|
||||
let start_time = Instant::now();
|
||||
let aux_files = self
|
||||
.timeline
|
||||
|
||||
@@ -156,6 +156,8 @@ impl FeatureResolver {
|
||||
|
||||
let tenant_properties = PerTenantProperties {
|
||||
remote_size_mb: Some(rand::rng().random_range(100.0..1000000.00)),
|
||||
db_count_max: Some(rand::rng().random_range(1..1000)),
|
||||
rel_count_max: Some(rand::rng().random_range(1..1000)),
|
||||
}
|
||||
.into_posthog_properties();
|
||||
|
||||
@@ -344,6 +346,8 @@ impl FeatureResolver {
|
||||
|
||||
struct PerTenantProperties {
|
||||
pub remote_size_mb: Option<f64>,
|
||||
pub db_count_max: Option<usize>,
|
||||
pub rel_count_max: Option<usize>,
|
||||
}
|
||||
|
||||
impl PerTenantProperties {
|
||||
@@ -355,6 +359,18 @@ impl PerTenantProperties {
|
||||
PostHogFlagFilterPropertyValue::Number(remote_size_mb),
|
||||
);
|
||||
}
|
||||
if let Some(db_count) = self.db_count_max {
|
||||
properties.insert(
|
||||
"tenant_db_count_max".to_string(),
|
||||
PostHogFlagFilterPropertyValue::Number(db_count as f64),
|
||||
);
|
||||
}
|
||||
if let Some(rel_count) = self.rel_count_max {
|
||||
properties.insert(
|
||||
"tenant_rel_count_max".to_string(),
|
||||
PostHogFlagFilterPropertyValue::Number(rel_count as f64),
|
||||
);
|
||||
}
|
||||
properties
|
||||
}
|
||||
}
|
||||
@@ -409,7 +425,11 @@ impl TenantFeatureResolver {
|
||||
|
||||
/// Refresh the cached properties and flags on the critical path.
|
||||
pub fn refresh_properties_and_flags(&self, tenant_shard: &TenantShard) {
|
||||
// Any of the remote size is none => this property is none.
|
||||
let mut remote_size_mb = Some(0.0);
|
||||
// Any of the db or rel count is available => this property is available.
|
||||
let mut db_count_max = None;
|
||||
let mut rel_count_max = None;
|
||||
for timeline in tenant_shard.list_timelines() {
|
||||
let size = timeline.metrics.resident_physical_size_get();
|
||||
if size == 0 {
|
||||
@@ -419,9 +439,25 @@ impl TenantFeatureResolver {
|
||||
if let Some(ref mut remote_size_mb) = remote_size_mb {
|
||||
*remote_size_mb += size as f64 / 1024.0 / 1024.0;
|
||||
}
|
||||
if let Some(data) = timeline.db_rel_count.load_full() {
|
||||
let (db_count, rel_count) = *data.as_ref();
|
||||
if db_count_max.is_none() {
|
||||
db_count_max = Some(db_count);
|
||||
}
|
||||
if rel_count_max.is_none() {
|
||||
rel_count_max = Some(rel_count);
|
||||
}
|
||||
db_count_max = db_count_max.map(|max| max.max(db_count));
|
||||
rel_count_max = rel_count_max.map(|max| max.max(rel_count));
|
||||
}
|
||||
}
|
||||
self.cached_tenant_properties.store(Arc::new(
|
||||
PerTenantProperties { remote_size_mb }.into_posthog_properties(),
|
||||
PerTenantProperties {
|
||||
remote_size_mb,
|
||||
db_count_max,
|
||||
rel_count_max,
|
||||
}
|
||||
.into_posthog_properties(),
|
||||
));
|
||||
|
||||
// BEGIN: Update the feature flag on the critical path.
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
//!
|
||||
use std::collections::{HashMap, HashSet, hash_map};
|
||||
use std::ops::{ControlFlow, Range};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::walingest::{WalIngestError, WalIngestErrorKind};
|
||||
use crate::{PERF_TRACE_TARGET, ensure_walingest};
|
||||
@@ -1254,11 +1255,16 @@ impl Timeline {
|
||||
let dbdir = DbDirectory::des(&buf)?;
|
||||
|
||||
let mut total_size: u64 = 0;
|
||||
let mut dbdir_cnt = 0;
|
||||
let mut rel_cnt = 0;
|
||||
|
||||
for (spcnode, dbnode) in dbdir.dbdirs.keys() {
|
||||
dbdir_cnt += 1;
|
||||
for rel in self
|
||||
.list_rels(*spcnode, *dbnode, Version::at(lsn), ctx)
|
||||
.await?
|
||||
{
|
||||
rel_cnt += 1;
|
||||
if self.cancel.is_cancelled() {
|
||||
return Err(CalculateLogicalSizeError::Cancelled);
|
||||
}
|
||||
@@ -1269,6 +1275,10 @@ impl Timeline {
|
||||
total_size += relsize as u64;
|
||||
}
|
||||
}
|
||||
|
||||
self.db_rel_count
|
||||
.store(Some(Arc::new((dbdir_cnt, rel_cnt))));
|
||||
|
||||
Ok(total_size * BLCKSZ as u64)
|
||||
}
|
||||
|
||||
|
||||
@@ -287,7 +287,7 @@ pub struct Timeline {
|
||||
ancestor_lsn: Lsn,
|
||||
|
||||
// The LSN of gc-compaction that was last applied to this timeline.
|
||||
gc_compaction_state: ArcSwap<Option<GcCompactionState>>,
|
||||
gc_compaction_state: ArcSwapOption<GcCompactionState>,
|
||||
|
||||
pub(crate) metrics: Arc<TimelineMetrics>,
|
||||
|
||||
@@ -450,6 +450,9 @@ pub struct Timeline {
|
||||
|
||||
#[expect(dead_code)]
|
||||
feature_resolver: Arc<TenantFeatureResolver>,
|
||||
|
||||
/// Basebackup will collect the count and store it here. Used for reldirv2 rollout.
|
||||
pub(crate) db_rel_count: ArcSwapOption<(usize, usize)>,
|
||||
}
|
||||
|
||||
pub(crate) enum PreviousHeatmap {
|
||||
@@ -3237,7 +3240,7 @@ impl Timeline {
|
||||
}),
|
||||
disk_consistent_lsn: AtomicLsn::new(disk_consistent_lsn.0),
|
||||
|
||||
gc_compaction_state: ArcSwap::new(Arc::new(gc_compaction_state)),
|
||||
gc_compaction_state: ArcSwapOption::from_pointee(gc_compaction_state),
|
||||
|
||||
last_freeze_at: AtomicLsn::new(disk_consistent_lsn.0),
|
||||
last_freeze_ts: RwLock::new(Instant::now()),
|
||||
@@ -3342,6 +3345,8 @@ impl Timeline {
|
||||
basebackup_cache: resources.basebackup_cache,
|
||||
|
||||
feature_resolver: resources.feature_resolver.clone(),
|
||||
|
||||
db_rel_count: ArcSwapOption::from_pointee(None),
|
||||
};
|
||||
|
||||
result.repartition_threshold =
|
||||
@@ -3413,7 +3418,7 @@ impl Timeline {
|
||||
gc_compaction_state: GcCompactionState,
|
||||
) -> anyhow::Result<()> {
|
||||
self.gc_compaction_state
|
||||
.store(Arc::new(Some(gc_compaction_state.clone())));
|
||||
.store(Some(Arc::new(gc_compaction_state.clone())));
|
||||
self.remote_client
|
||||
.schedule_index_upload_for_gc_compaction_state_update(gc_compaction_state)
|
||||
}
|
||||
@@ -3429,7 +3434,10 @@ impl Timeline {
|
||||
}
|
||||
|
||||
pub(crate) fn get_gc_compaction_state(&self) -> Option<GcCompactionState> {
|
||||
self.gc_compaction_state.load_full().as_ref().clone()
|
||||
self.gc_compaction_state
|
||||
.load()
|
||||
.as_ref()
|
||||
.map(|x| x.as_ref().clone())
|
||||
}
|
||||
|
||||
/// Creates and starts the wal receiver.
|
||||
|
||||
@@ -50,11 +50,15 @@ def test_feature_flag(neon_env_builder: NeonEnvBuilder):
|
||||
)["result"]
|
||||
)
|
||||
|
||||
env.endpoints.create_start("main") # trigger basebackup
|
||||
env.pageserver.http_client().force_refresh_feature_flag(env.initial_tenant)
|
||||
|
||||
# Check if the properties exist
|
||||
result = env.pageserver.http_client().evaluate_feature_flag_multivariate(
|
||||
env.initial_tenant, "test-feature-flag"
|
||||
)
|
||||
|
||||
assert "tenant_remote_size_mb" in result["properties"]
|
||||
assert "tenant_db_count_max" in result["properties"]
|
||||
assert "tenant_rel_count_max" in result["properties"]
|
||||
assert "tenant_id" in result["properties"]
|
||||
|
||||
Reference in New Issue
Block a user