Support backpressure for sharding (#7100)

Add shard_number to PageserverFeedback and parse it on the compute side.
When compute receives a new ps_feedback, it calculates min LSNs among
feedbacks from all shards, and uses those LSNs for backpressure.

Add `test_sharding_backpressure` to verify that backpressure slows down
compute to wait for the slowest shard.
This commit is contained in:
Arthur Petukhovsky
2024-03-18 22:54:44 +01:00
committed by GitHub
parent 2bc2fd9cfd
commit ad5efb49ee
11 changed files with 336 additions and 149 deletions

View File

@@ -29,12 +29,10 @@ pub struct PageserverFeedback {
// Serialize with RFC3339 format.
#[serde(with = "serde_systemtime")]
pub replytime: SystemTime,
/// Used to track feedbacks from different shards. Always zero for unsharded tenants.
pub shard_number: u32,
}
// NOTE: Do not forget to increment this number when adding new fields to PageserverFeedback.
// Do not remove previously available fields because this might be backwards incompatible.
pub const PAGESERVER_FEEDBACK_FIELDS_NUMBER: u8 = 5;
impl PageserverFeedback {
pub fn empty() -> PageserverFeedback {
PageserverFeedback {
@@ -43,6 +41,7 @@ impl PageserverFeedback {
remote_consistent_lsn: Lsn::INVALID,
disk_consistent_lsn: Lsn::INVALID,
replytime: *PG_EPOCH,
shard_number: 0,
}
}
@@ -59,17 +58,26 @@ impl PageserverFeedback {
//
// TODO: change serialized fields names once all computes migrate to rename.
pub fn serialize(&self, buf: &mut BytesMut) {
buf.put_u8(PAGESERVER_FEEDBACK_FIELDS_NUMBER); // # of keys
let buf_ptr = buf.len();
buf.put_u8(0); // # of keys, will be filled later
let mut nkeys = 0;
nkeys += 1;
buf.put_slice(b"current_timeline_size\0");
buf.put_i32(8);
buf.put_u64(self.current_timeline_size);
nkeys += 1;
buf.put_slice(b"ps_writelsn\0");
buf.put_i32(8);
buf.put_u64(self.last_received_lsn.0);
nkeys += 1;
buf.put_slice(b"ps_flushlsn\0");
buf.put_i32(8);
buf.put_u64(self.disk_consistent_lsn.0);
nkeys += 1;
buf.put_slice(b"ps_applylsn\0");
buf.put_i32(8);
buf.put_u64(self.remote_consistent_lsn.0);
@@ -80,9 +88,19 @@ impl PageserverFeedback {
.expect("failed to serialize pg_replytime earlier than PG_EPOCH")
.as_micros() as i64;
nkeys += 1;
buf.put_slice(b"ps_replytime\0");
buf.put_i32(8);
buf.put_i64(timestamp);
if self.shard_number > 0 {
nkeys += 1;
buf.put_slice(b"shard_number\0");
buf.put_i32(4);
buf.put_u32(self.shard_number);
}
buf[buf_ptr] = nkeys;
}
// Deserialize PageserverFeedback message
@@ -125,9 +143,8 @@ impl PageserverFeedback {
}
b"shard_number" => {
let len = buf.get_i32();
// TODO: this will be implemented in the next update,
// for now, we just skip the value.
buf.advance(len as usize);
assert_eq!(len, 4);
rf.shard_number = buf.get_u32();
}
_ => {
let len = buf.get_i32();
@@ -200,10 +217,7 @@ mod tests {
rf.serialize(&mut data);
// Add an extra field to the buffer and adjust number of keys
if let Some(first) = data.first_mut() {
*first = PAGESERVER_FEEDBACK_FIELDS_NUMBER + 1;
}
data[0] += 1;
data.put_slice(b"new_field_one\0");
data.put_i32(8);
data.put_u64(42);