wal_decoder: reuse codec throughout sender/receiver lifetime

Problem

Previously, we used `from_wire` and `to_wire` inline to encode and
decode record batches. This means we always have to match on the format,
and, more importantly, doesn't allow for reuse of the zstd
encoder/decoder.

Summary of Changes

Refactor such that the encoder and decoder can have the same lifetime
as the sender/receiver session.
This commit is contained in:
Vlad Lazar
2024-11-26 16:50:17 +01:00
parent 9e0148de11
commit 6666f6807b
8 changed files with 528 additions and 91 deletions

View File

@@ -9,6 +9,7 @@ testing = ["pageserver_api/testing"]
[dependencies]
async-compression.workspace = true
async-trait.workspace = true
anyhow.workspace = true
bytes.workspace = true
pageserver_api.workspace = true

View File

@@ -0,0 +1,192 @@
use bytes::{BufMut, Bytes, BytesMut};
use prost::Message;
use tokio::io::AsyncWriteExt;
use utils::postgres_client::{Compression, InterpretedFormat};
use crate::models::proto;
use crate::models::InterpretedWalRecords;
use crate::protobuf_conversions::TranscodeError;
use utils::bin_ser::{BeSer, DeserializeError, SerializeError};
#[derive(Debug, thiserror::Error)]
pub enum EncodeError {
#[error("{0}")]
Bincode(#[from] SerializeError),
#[error("{0}")]
Protobuf(#[from] ProtobufSerializeError),
#[error("{0}")]
Compression(#[from] std::io::Error),
}
#[derive(Debug, thiserror::Error)]
pub enum ProtobufSerializeError {
#[error("{0}")]
MetadataRecord(#[from] SerializeError),
#[error("{0}")]
Encode(#[from] prost::EncodeError),
}
#[derive(Debug, thiserror::Error)]
pub enum DecodeError {
#[error("{0}")]
Bincode(#[from] DeserializeError),
#[error("{0}")]
Protobuf(#[from] ProtobufDeserializeError),
#[error("{0}")]
Decompress(#[from] std::io::Error),
}
#[derive(Debug, thiserror::Error)]
pub enum ProtobufDeserializeError {
#[error("{0}")]
Transcode(#[from] TranscodeError),
#[error("{0}")]
Decode(#[from] prost::DecodeError),
}
pub fn encoder_from_proto(
format: InterpretedFormat,
compression: Option<Compression>,
) -> Box<dyn Encoder> {
match format {
InterpretedFormat::Bincode => Box::new(BincodeEncoder { compression }),
InterpretedFormat::Protobuf => Box::new(ProtobufEncoder { compression }),
}
}
pub fn make_decoder(
format: InterpretedFormat,
compression: Option<Compression>,
) -> Box<dyn Decoder> {
match format {
InterpretedFormat::Bincode => Box::new(BincodeDecoder { compression }),
InterpretedFormat::Protobuf => Box::new(ProtobufDecoder { compression }),
}
}
#[async_trait::async_trait]
pub trait Encoder: Send + Sync {
async fn encode(&self, records: InterpretedWalRecords) -> Result<Bytes, EncodeError>;
}
#[async_trait::async_trait]
pub trait Decoder: Send + Sync {
async fn decode(&self, buf: &Bytes) -> Result<InterpretedWalRecords, DecodeError>;
}
struct BincodeDecoder {
compression: Option<Compression>,
}
#[async_trait::async_trait]
impl Decoder for BincodeDecoder {
async fn decode(&self, buf: &Bytes) -> Result<InterpretedWalRecords, DecodeError> {
let decompressed_buf = match self.compression {
Some(Compression::Zstd { .. }) => {
use async_compression::tokio::write::ZstdDecoder;
let mut decoded_buf = Vec::with_capacity(buf.len());
let mut decoder = ZstdDecoder::new(&mut decoded_buf);
decoder.write_all(buf).await?;
decoder.flush().await?;
Bytes::from(decoded_buf)
}
None => buf.clone(),
};
InterpretedWalRecords::des(&decompressed_buf).map_err(DecodeError::Bincode)
}
}
struct BincodeEncoder {
compression: Option<Compression>,
}
#[async_trait::async_trait]
impl Encoder for BincodeEncoder {
async fn encode(&self, records: InterpretedWalRecords) -> Result<Bytes, EncodeError> {
use async_compression::tokio::write::ZstdEncoder;
use async_compression::Level;
let buf = BytesMut::new();
let mut buf = buf.writer();
records.ser_into(&mut buf)?;
let buf = buf.into_inner().freeze();
let compressed_buf = match self.compression {
Some(Compression::Zstd { level }) => {
let mut encoder = ZstdEncoder::with_quality(
Vec::with_capacity(buf.len() / 4),
Level::Precise(level as i32),
);
encoder.write_all(&buf).await?;
encoder.shutdown().await?;
Bytes::from(encoder.into_inner())
}
None => buf,
};
Ok(compressed_buf)
}
}
struct ProtobufDecoder {
compression: Option<Compression>,
}
#[async_trait::async_trait]
impl Decoder for ProtobufDecoder {
async fn decode(&self, buf: &Bytes) -> Result<InterpretedWalRecords, DecodeError> {
let decompressed_buf = match self.compression {
Some(Compression::Zstd { .. }) => {
use async_compression::tokio::write::ZstdDecoder;
let mut decoded_buf = Vec::with_capacity(buf.len());
let mut decoder = ZstdDecoder::new(&mut decoded_buf);
decoder.write_all(buf).await?;
decoder.flush().await?;
Bytes::from(decoded_buf)
}
None => buf.clone(),
};
let proto = proto::InterpretedWalRecords::decode(decompressed_buf)
.map_err(|e| DecodeError::Protobuf(e.into()))?;
InterpretedWalRecords::try_from(proto).map_err(|e| DecodeError::Protobuf(e.into()))
}
}
struct ProtobufEncoder {
compression: Option<Compression>,
}
#[async_trait::async_trait]
impl Encoder for ProtobufEncoder {
async fn encode(&self, records: InterpretedWalRecords) -> Result<Bytes, EncodeError> {
use async_compression::tokio::write::ZstdEncoder;
use async_compression::Level;
let proto: proto::InterpretedWalRecords = records.try_into()?;
let mut buf = BytesMut::new();
proto
.encode(&mut buf)
.map_err(|e| EncodeError::Protobuf(e.into()))?;
let buf = buf.freeze();
let compressed_buf = match self.compression {
Some(Compression::Zstd { level }) => {
let mut encoder = ZstdEncoder::with_quality(
Vec::with_capacity(buf.len() / 4),
Level::Precise(level as i32),
);
encoder.write_all(&buf).await?;
encoder.shutdown().await?;
Bytes::from(encoder.into_inner())
}
None => buf,
};
Ok(compressed_buf)
}
}

View File

@@ -1,4 +1,5 @@
pub mod codec;
pub mod decoder;
pub mod models;
pub mod protobuf_conversions;
pub mod serialized_batch;
pub mod wire_format;

View File

@@ -0,0 +1,220 @@
use pageserver_api::key::CompactKey;
use utils::bin_ser::{BeSer, DeserializeError, SerializeError};
use utils::lsn::Lsn;
use crate::models::{
FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords, MetadataRecord,
};
use crate::serialized_batch::{
ObservedValueMeta, SerializedValueBatch, SerializedValueMeta, ValueMeta,
};
use crate::models::proto;
#[derive(Debug, thiserror::Error)]
pub enum TranscodeError {
#[error("{0}")]
BadInput(String),
#[error("{0}")]
MetadataRecord(#[from] DeserializeError),
}
impl TryFrom<InterpretedWalRecords> for proto::InterpretedWalRecords {
type Error = SerializeError;
fn try_from(value: InterpretedWalRecords) -> Result<Self, Self::Error> {
let records = value
.records
.into_iter()
.map(proto::InterpretedWalRecord::try_from)
.collect::<Result<Vec<_>, _>>()?;
Ok(proto::InterpretedWalRecords {
records,
next_record_lsn: value.next_record_lsn.map(|l| l.0),
})
}
}
impl TryFrom<InterpretedWalRecord> for proto::InterpretedWalRecord {
type Error = SerializeError;
fn try_from(value: InterpretedWalRecord) -> Result<Self, Self::Error> {
let metadata_record = value
.metadata_record
.map(|meta_rec| -> Result<Vec<u8>, Self::Error> {
let mut buf = Vec::new();
meta_rec.ser_into(&mut buf)?;
Ok(buf)
})
.transpose()?;
Ok(proto::InterpretedWalRecord {
metadata_record,
batch: Some(proto::SerializedValueBatch::from(value.batch)),
next_record_lsn: value.next_record_lsn.0,
flush_uncommitted: matches!(value.flush_uncommitted, FlushUncommittedRecords::Yes),
xid: value.xid,
})
}
}
impl From<SerializedValueBatch> for proto::SerializedValueBatch {
fn from(value: SerializedValueBatch) -> Self {
proto::SerializedValueBatch {
raw: value.raw,
metadata: value
.metadata
.into_iter()
.map(proto::ValueMeta::from)
.collect(),
max_lsn: value.max_lsn.0,
len: value.len as u64,
}
}
}
impl From<ValueMeta> for proto::ValueMeta {
fn from(value: ValueMeta) -> Self {
match value {
ValueMeta::Observed(obs) => proto::ValueMeta {
r#type: proto::ValueMetaType::Observed.into(),
key: Some(proto::CompactKey::from(obs.key)),
lsn: obs.lsn.0,
batch_offset: None,
len: None,
will_init: None,
},
ValueMeta::Serialized(ser) => proto::ValueMeta {
r#type: proto::ValueMetaType::Serialized.into(),
key: Some(proto::CompactKey::from(ser.key)),
lsn: ser.lsn.0,
batch_offset: Some(ser.batch_offset),
len: Some(ser.len as u64),
will_init: Some(ser.will_init),
},
}
}
}
impl From<CompactKey> for proto::CompactKey {
fn from(value: CompactKey) -> Self {
proto::CompactKey {
high: (value.raw() >> 64) as i64,
low: value.raw() as i64,
}
}
}
impl TryFrom<proto::InterpretedWalRecords> for InterpretedWalRecords {
type Error = TranscodeError;
fn try_from(value: proto::InterpretedWalRecords) -> Result<Self, Self::Error> {
let records = value
.records
.into_iter()
.map(InterpretedWalRecord::try_from)
.collect::<Result<_, _>>()?;
Ok(InterpretedWalRecords {
records,
next_record_lsn: value.next_record_lsn.map(Lsn::from),
})
}
}
impl TryFrom<proto::InterpretedWalRecord> for InterpretedWalRecord {
type Error = TranscodeError;
fn try_from(value: proto::InterpretedWalRecord) -> Result<Self, Self::Error> {
let metadata_record = value
.metadata_record
.map(|mrec| -> Result<_, DeserializeError> { MetadataRecord::des(&mrec) })
.transpose()?;
let batch = {
let batch = value.batch.ok_or_else(|| {
TranscodeError::BadInput("InterpretedWalRecord::batch missing".to_string())
})?;
SerializedValueBatch::try_from(batch)?
};
Ok(InterpretedWalRecord {
metadata_record,
batch,
next_record_lsn: Lsn(value.next_record_lsn),
flush_uncommitted: if value.flush_uncommitted {
FlushUncommittedRecords::Yes
} else {
FlushUncommittedRecords::No
},
xid: value.xid,
})
}
}
impl TryFrom<proto::SerializedValueBatch> for SerializedValueBatch {
type Error = TranscodeError;
fn try_from(value: proto::SerializedValueBatch) -> Result<Self, Self::Error> {
let metadata = value
.metadata
.into_iter()
.map(ValueMeta::try_from)
.collect::<Result<Vec<_>, _>>()?;
Ok(SerializedValueBatch {
raw: value.raw,
metadata,
max_lsn: Lsn(value.max_lsn),
len: value.len as usize,
})
}
}
impl TryFrom<proto::ValueMeta> for ValueMeta {
type Error = TranscodeError;
fn try_from(value: proto::ValueMeta) -> Result<Self, Self::Error> {
match proto::ValueMetaType::try_from(value.r#type) {
Ok(proto::ValueMetaType::Serialized) => {
Ok(ValueMeta::Serialized(SerializedValueMeta {
key: value
.key
.ok_or_else(|| {
TranscodeError::BadInput("ValueMeta::key missing".to_string())
})?
.into(),
lsn: Lsn(value.lsn),
batch_offset: value.batch_offset.ok_or_else(|| {
TranscodeError::BadInput("ValueMeta::batch_offset missing".to_string())
})?,
len: value.len.ok_or_else(|| {
TranscodeError::BadInput("ValueMeta::len missing".to_string())
})? as usize,
will_init: value.will_init.ok_or_else(|| {
TranscodeError::BadInput("ValueMeta::will_init missing".to_string())
})?,
}))
}
Ok(proto::ValueMetaType::Observed) => Ok(ValueMeta::Observed(ObservedValueMeta {
key: value
.key
.ok_or_else(|| TranscodeError::BadInput("ValueMeta::key missing".to_string()))?
.into(),
lsn: Lsn(value.lsn),
})),
Err(_) => Err(TranscodeError::BadInput(format!(
"Unexpected ValueMeta::type {}",
value.r#type
))),
}
}
}
impl From<proto::CompactKey> for CompactKey {
fn from(value: proto::CompactKey) -> Self {
(((value.high as i128) << 64) | (value.low as i128)).into()
}
}