Files
neon/safekeeper/Cargo.toml
Vlad Lazar 7a2f0ed8d4 safekeeper: lift decoding and interpretation of WAL to the safekeeper (#9746)
## Problem

For any given tenant shard, pageservers receive all of the tenant's WAL
from the safekeeper.
This soft-blocks us from using larger shard counts due to bandwidth
concerns and CPU overhead of filtering
out the records.

## Summary of changes

This PR lifts the decoding and interpretation of WAL from the pageserver
into the safekeeper.

A customised PG replication protocol is used where instead of sending
raw WAL, the safekeeper sends
filtered, interpreted records. The receiver drives the protocol
selection, so, on the pageserver side, usage
of the new protocol is gated by a new pageserver config:
`wal_receiver_protocol`.

 More granularly the changes are:
1. Optionally inject the protocol and shard identity into the arguments
used for starting replication
2. On the safekeeper side, implement a new wal sending primitive which
decodes and interprets records
 before sending them over
3. On the pageserver side, implement the ingestion of this new
replication message type. It's very similar
 to what we already have for raw wal (minus decoding and interpreting).
 
 ## Notes
 
* This PR currently uses my [branch of
rust-postgres](https://github.com/neondatabase/rust-postgres/tree/vlad/interpreted-wal-record-replication-support)
which includes the deserialization logic for the new replication message
type. PR for that is open
[here](https://github.com/neondatabase/rust-postgres/pull/32).
* This PR contains changes for both pageservers and safekeepers. It's
safe to merge because the new protocol is disabled by default on the
pageserver side. We can gradually start enabling it in subsequent
releases.
* CI tests are running on https://github.com/neondatabase/neon/pull/9747
 
 ## Links
 
 Related: https://github.com/neondatabase/neon/issues/9336
 Epic: https://github.com/neondatabase/neon/issues/9329
2024-11-25 17:29:28 +00:00

78 lines
2.0 KiB
TOML

[package]
name = "safekeeper"
version = "0.1.0"
edition.workspace = true
license.workspace = true
[features]
default = []
# Enables test-only APIs, incuding failpoints. In particular, enables the `fail_point!` macro,
# which adds some runtime cost to run tests on outage conditions
testing = ["fail/failpoints"]
[dependencies]
async-stream.workspace = true
anyhow.workspace = true
byteorder.workspace = true
bytes.workspace = true
camino.workspace = true
camino-tempfile.workspace = true
chrono.workspace = true
clap = { workspace = true, features = ["derive"] }
crc32c.workspace = true
fail.workspace = true
hex.workspace = true
humantime.workspace = true
http.workspace = true
hyper0.workspace = true
futures.workspace = true
once_cell.workspace = true
parking_lot.workspace = true
pageserver_api.workspace = true
postgres.workspace = true
postgres-protocol.workspace = true
pprof.workspace = true
rand.workspace = true
regex.workspace = true
scopeguard.workspace = true
reqwest = { workspace = true, features = ["json"] }
serde.workspace = true
serde_json.workspace = true
strum.workspace = true
strum_macros.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["fs"] }
tokio-util = { workspace = true }
tokio-io-timeout.workspace = true
tokio-postgres.workspace = true
tokio-tar.workspace = true
tracing.workspace = true
url.workspace = true
metrics.workspace = true
postgres_backend.workspace = true
postgres_ffi.workspace = true
pq_proto.workspace = true
remote_storage.workspace = true
safekeeper_api.workspace = true
sha2.workspace = true
sd-notify.workspace = true
storage_broker.workspace = true
tokio-stream.workspace = true
utils.workspace = true
wal_decoder.workspace = true
workspace_hack.workspace = true
[dev-dependencies]
criterion.workspace = true
itertools.workspace = true
walproposer.workspace = true
rand.workspace = true
desim.workspace = true
tracing.workspace = true
tracing-subscriber = { workspace = true, features = ["json"] }
[[bench]]
name = "receive_wal"
harness = false