fix(pageserver): use a buffer for basebackup; add aux basebackup metrics log (#9401)

Our replication bench project is stuck because it is too slow to
generate basebackup and it caused compute to disconnect.

https://neondb.slack.com/archives/C03438W3FLZ/p1728330685012419

The compute timeout for waiting for basebackup is 10m (is it true?).
Generating basebackup directly on pageserver takes ~3min. Therefore, I
suspect it's because there are too many wasted round-trip time for
writing the 10000+ snapshot aux files. Also, it is possible that the
basebackup process takes too long time retrieving all aux files that it
did not write anything over the wire protocol, causing a read timeout.

Basebackup size is 800KB gzipped for that project and was 55MB tar
before compression.

## Summary of changes

* Potentially fix the issue by placing a write buffer for basebackup.
* Log how many aux files did we read + the time spent on it.

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z.
2024-10-15 16:35:21 -04:00
committed by GitHub
parent cf7a596a15
commit f1eb703256
2 changed files with 24 additions and 7 deletions

View File

@@ -16,7 +16,7 @@ use fail::fail_point;
use pageserver_api::key::Key;
use postgres_ffi::pg_constants;
use std::fmt::Write as FmtWrite;
use std::time::SystemTime;
use std::time::{Instant, SystemTime};
use tokio::io;
use tokio::io::AsyncWrite;
use tracing::*;
@@ -352,12 +352,25 @@ where
}
}
for (path, content) in self
let start_time = Instant::now();
let aux_files = self
.timeline
.list_aux_files(self.lsn, self.ctx)
.await
.map_err(|e| BasebackupError::Server(e.into()))?
{
.map_err(|e| BasebackupError::Server(e.into()))?;
let aux_scan_time = start_time.elapsed();
let aux_estimated_size = aux_files
.values()
.map(|content| content.len())
.sum::<usize>();
info!(
"Scanned {} aux files in {}ms, aux file content size = {}",
aux_files.len(),
aux_scan_time.as_millis(),
aux_estimated_size
);
for (path, content) in aux_files {
if path.starts_with("pg_replslot") {
let offs = pg_constants::REPL_SLOT_ON_DISK_OFFSETOF_RESTART_LSN;
let restart_lsn = Lsn(u64::from_le_bytes(

View File

@@ -26,8 +26,8 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::SystemTime;
use std::time::{Duration, Instant};
use tokio::io::AsyncWriteExt;
use tokio::io::{AsyncRead, AsyncWrite};
use tokio::io::{AsyncWriteExt, BufWriter};
use tokio::task::JoinHandle;
use tokio_util::sync::CancellationToken;
use tracing::*;
@@ -1137,10 +1137,10 @@ impl PageServerHandler {
.await
.map_err(map_basebackup_error)?;
} else {
let mut writer = pgb.copyout_writer();
let mut writer = BufWriter::new(pgb.copyout_writer());
if gzip {
let mut encoder = GzipEncoder::with_quality(
writer,
&mut writer,
// NOTE using fast compression because it's on the critical path
// for compute startup. For an empty database, we get
// <100KB with this method. The Level::Best compression method
@@ -1175,6 +1175,10 @@ impl PageServerHandler {
.await
.map_err(map_basebackup_error)?;
}
writer
.flush()
.await
.map_err(|e| map_basebackup_error(BasebackupError::Client(e)))?;
}
pgb.write_message_noflush(&BeMessage::CopyDone)