From a71cd22e09bd62c91272e267d5c72bd985bb1eee Mon Sep 17 00:00:00 2001 From: Bojan Serafimov Date: Wed, 29 Nov 2023 15:03:20 -0500 Subject: [PATCH] Speed up rel extend --- pageserver/src/walingest.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index a9173b41e9..14a8a956c5 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -1440,20 +1440,27 @@ impl<'a> WalIngest<'a> { // record. // TODO: would be nice if to be more explicit about it let last_lsn = modification.lsn; - let old_nblocks = if !self - .timeline - .get_rel_exists(rel, last_lsn, true, ctx) - .await? + let old_nblocks = self.timeline.get_rel_size(rel, last_lsn, true, ctx).await?; + + // Create relation if not exists. + // + // NOTE: We check that old_nblocks == 0 as an optimization. At the time of + // writing, this sped up walingest by 13% on pgbench init. The bottleneck + // here is redundant calls to relation size cache and redundant locking + // in all of these functions. The proper solution is to manually inline + // and lock only once. + if old_nblocks == 0 + && !self + .timeline + .get_rel_exists(rel, last_lsn, true, ctx) + .await? { // create it with 0 size initially, the logic below will extend it modification .put_rel_creation(rel, 0, ctx) .await .context("Relation Error")?; - 0 - } else { - self.timeline.get_rel_size(rel, last_lsn, true, ctx).await? - }; + } if new_nblocks > old_nblocks { //info!("extending {} {} to {}", rel, old_nblocks, new_nblocks); @@ -2150,9 +2157,11 @@ mod tests { // Decode and ingest wal. We process the wal in chunks because // that's what happens when we get bytes from safekeepers. + let mut n_records = 0; for chunk in bytes[xlogoff..].chunks(50) { decoder.feed_bytes(chunk); while let Some((lsn, recdata)) = decoder.poll_decode().unwrap() { + n_records += 1; walingest .ingest_record(recdata, lsn, &mut modification, &mut decoded, &ctx) .await @@ -2161,6 +2170,6 @@ mod tests { } let duration = started_at.elapsed(); - println!("done in {:?}", duration); + println!("ingested {} records in {:?}", n_records, duration); } }