From 605980194357c59cdf42e603935647214ec6e2b0 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Mon, 24 Jan 2022 23:03:17 +0300 Subject: [PATCH] Enable Postgres data checksums (neondatabase/cloud#536) We need checksums to verify data integrity, when we read it from untrusted place (e.g. local disk) or via untrusted communication channel (e.g. network). At the same time, we trust pageserver <-> redo process communication channel, as it is just a pipe. Here we enable calculation of data checksums in the wal redo process and when we extract FPI during WAL injestion. Compute node (Postgres) will verify checksum of every page after receiving it back from pageserver. So it is pretty similar to how vanilla Postgres checks them. There are two other places where we should verify checksums to detect data corruption earlier: - when we receive WAL records from safekeepers (already implemented, see: WalStreamDecoder::poll_decode) - when we write layer files to disk and read back in memory from local disk or S3 --- libs/postgres_ffi/wal_generate/src/lib.rs | 1 + libs/utils/scripts/restore_from_wal.sh | 2 +- pageserver/src/timelines.rs | 1 + pageserver/src/walredo.rs | 1 + test_runner/README.md | 2 +- test_runner/batch_others/test_wal_acceptor.py | 2 +- 6 files changed, 6 insertions(+), 3 deletions(-) diff --git a/libs/postgres_ffi/wal_generate/src/lib.rs b/libs/postgres_ffi/wal_generate/src/lib.rs index 2b3f5ef703..37fd805e27 100644 --- a/libs/postgres_ffi/wal_generate/src/lib.rs +++ b/libs/postgres_ffi/wal_generate/src/lib.rs @@ -55,6 +55,7 @@ impl Conf { let output = self .new_pg_command("initdb")? .arg("-D") + .arg("--data-checksums") .arg(self.datadir.as_os_str()) .args(&["-U", "postgres", "--no-instructions", "--no-sync"]) .output()?; diff --git a/libs/utils/scripts/restore_from_wal.sh b/libs/utils/scripts/restore_from_wal.sh index 9bd860affb..e0d8018356 100755 --- a/libs/utils/scripts/restore_from_wal.sh +++ b/libs/utils/scripts/restore_from_wal.sh @@ -5,7 +5,7 @@ DATA_DIR=$3 PORT=$4 SYSID=`od -A n -j 24 -N 8 -t d8 $WAL_PATH/000000010000000000000002* | cut -c 3-` rm -fr $DATA_DIR -env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --sysid=$SYSID +env -i LD_LIBRARY_PATH=$PG_BIN/../lib $PG_BIN/initdb -E utf8 -U cloud_admin -D $DATA_DIR --data-checksums --sysid=$SYSID echo port=$PORT >> $DATA_DIR/postgresql.conf REDO_POS=0x`$PG_BIN/pg_controldata -D $DATA_DIR | fgrep "REDO location"| cut -c 42-` declare -i WAL_SIZE=$REDO_POS+114 diff --git a/pageserver/src/timelines.rs b/pageserver/src/timelines.rs index a3939661c1..b95db1a4d7 100644 --- a/pageserver/src/timelines.rs +++ b/pageserver/src/timelines.rs @@ -253,6 +253,7 @@ fn run_initdb(conf: &'static PageServerConf, initdbpath: &Path) -> Result<()> { .args(&["-D", &initdbpath.to_string_lossy()]) .args(&["-U", &conf.superuser]) .args(&["-E", "utf8"]) + .arg("--data-checksums") .arg("--no-instructions") // This is only used for a temporary installation that is deleted shortly after, // so no need to fsync it diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index cad211b1bd..22f280d5bd 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -619,6 +619,7 @@ impl PostgresRedoProcess { info!("running initdb in {:?}", datadir.display()); let initdb = Command::new(conf.pg_bin_dir().join("initdb")) .args(&["-D", &datadir.to_string_lossy()]) + .arg("--data-checksums") .arg("-N") .env_clear() .env("LD_LIBRARY_PATH", conf.pg_lib_dir()) diff --git a/test_runner/README.md b/test_runner/README.md index 4b54c45175..1edbcb2972 100644 --- a/test_runner/README.md +++ b/test_runner/README.md @@ -37,7 +37,7 @@ You can run all the tests with: If you want to run all the tests in a particular file: -`./scripts/pytest test_pgbench.py` +`./scripts/pytest test_runner/batch_others/test_restart_compute.py` If you want to run all tests that have the string "bench" in their names: diff --git a/test_runner/batch_others/test_wal_acceptor.py b/test_runner/batch_others/test_wal_acceptor.py index 9b876f780d..b65dee9c32 100644 --- a/test_runner/batch_others/test_wal_acceptor.py +++ b/test_runner/batch_others/test_wal_acceptor.py @@ -682,7 +682,7 @@ class ProposerPostgres(PgProtocol): def initdb(self): """ Run initdb """ - args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path()] + args = ["initdb", "-U", "cloud_admin", "-D", self.pg_data_dir_path(), "--data-checksums"] self.pg_bin.run(args) def start(self):