Enable Postgres data checksums (neondatabase/cloud#536)

We need checksums to verify data integrity, when we read it from
untrusted place (e.g. local disk) or via untrusted communication channel
(e.g. network). At the same time, we trust pageserver <-> redo process
communication channel, as it is just a pipe.

Here we enable calculation of data checksums in the wal redo process and
when we extract FPI during WAL injestion. Compute node (Postgres) will
verify checksum of every page after receiving it back from pageserver.
So it is pretty similar to how vanilla Postgres checks them.

There are two other places where we should verify checksums to
detect data corruption earlier:
- when we receive WAL records from safekeepers (already implemented,
  see: WalStreamDecoder::poll_decode)
- when we write layer files to disk and read back in memory from local
  disk or S3
This commit is contained in:
Alexey Kondratov
2022-01-24 23:03:17 +03:00
parent 2501afba6e
commit 6059801943
6 changed files with 6 additions and 3 deletions

View File

@@ -55,6 +55,7 @@ impl Conf {
let output = self
.new_pg_command("initdb")?
.arg("-D")
.arg("--data-checksums")
.arg(self.datadir.as_os_str())
.args(&["-U", "postgres", "--no-instructions", "--no-sync"])
.output()?;

View File

@@ -5,7 +5,7 @@ DATA_DIR=$3
PORT=$4
SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-`
rm -fr $DATA_DIR
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID
env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --data-checksums --sysid=$SYSID
echo port=$PORT >> $DATA_DIR/postgresql.conf
REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-`
declare -i WAL_SIZE=$REDO_POS+114

View File

@@ -253,6 +253,7 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> {
.args(&["-D", &initdbpath.to_string_lossy()])
.args(&["-U", &conf.superuser])
.args(&["-E", "utf8"])
.arg("--data-checksums")
.arg("--no-instructions")
// This is only used for a temporary installation that is deleted shortly after,
// so no need to fsync it

View File

@@ -619,6 +619,7 @@ impl PostgresRedoProcess {
info!("running initdb in {:?}", datadir.display());
let initdb = Command::new(conf.pg_bin_dir().join("initdb"))
.args(&["-D", &datadir.to_string_lossy()])
.arg("--data-checksums")
.arg("-N")
.env_clear()
.env("LD_LIBRARY_PATH", conf.pg_lib_dir())

View File

@@ -37,7 +37,7 @@ You can run all the tests with:
If you want to run all the tests in a particular file:
`./scripts/pytest test_pgbench.py`
`./scripts/pytest test_runner/batch_others/test_restart_compute.py`
If you want to run all tests that have the string "bench" in their names:

View File

@@ -682,7 +682,7 @@ class ProposerPostgres(PgProtocol):
def initdb(self):
""" Run initdb """
args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()]
args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path(), "--data-checksums"]
self.pg_bin.run(args)
def start(self):