diff --git a/postgres_ffi/README b/postgres_ffi/README index 899e8234f6..bc4c6c6806 100644 --- a/postgres_ffi/README +++ b/postgres_ffi/README @@ -1,3 +1,25 @@ -This module contains utility functions for interacting with PostgreSQL -file formats. +This module contains utilities for working with PostgreSQL file +formats. It's a collection of structs that are auto-generated from the +PostgreSQL header files using bindgen, and Rust functions to read and +manipulate them. +There are also a bunch of constants in `pg_constants.rs` that are copied +from various PostgreSQL headers, rather than auto-generated. They mostly +should be auto-generated too, but that's a TODO. + +The PostgreSQL on-disk file format is not portable across different +CPU architectures and operating systems. It is also subject to change +in each major PostgreSQL version. Currently, this module is based on +PostgreSQL v14, but in the future we will probably need a separate +copy for each PostgreSQL version. + +To interact with the C structs, there is some unsafe code in this +module. Do not copy-paste that to the rest of the codebase! Keep the +amount of unsafe code to a minimum, and limited to this module only, +and only where it's truly needed. + +TODO: Currently, there is also some code that deals with WAL records +in pageserver/src/waldecoder.rs. That should be moved into this +module. The rest of the codebase should not have intimate knowledge of +PostgreSQL file formats or WAL layout, that knowledge should be +encapsulated in this module. diff --git a/postgres_ffi/build.rs b/postgres_ffi/build.rs index b834bd99db..d1df770d51 100644 --- a/postgres_ffi/build.rs +++ b/postgres_ffi/build.rs @@ -11,27 +11,36 @@ fn main() { // to bindgen, and lets you build up options for // the resulting bindings. let bindings = bindgen::Builder::default() - // The input header we would like to generate - // bindings for. + // + // All the needed PostgreSQL headers are included from 'pg_control_ffi.h' + // .header("pg_control_ffi.h") + // // Tell cargo to invalidate the built crate whenever any of the // included header files changed. + // .parse_callbacks(Box::new(bindgen::CargoCallbacks)) + // + // These are the types and constants that we want to generate bindings for + // .whitelist_type("ControlFileData") .whitelist_var("PG_CONTROL_FILE_SIZE") .whitelist_var("PG_CONTROLFILEDATA_OFFSETOF_CRC") .whitelist_type("DBState") + // // Path the server include dir. It is in tmp_install/include/server, if you did // "configure --prefix=". But if you used "configure --prefix=/", // and used DESTDIR to move it into tmp_install, then it's in // tmp_install/include/postgres/server // 'pg_config --includedir-server' would perhaps be the more proper way to find it, // but this will do for now. + // .clang_arg("-I../tmp_install/include/server") .clang_arg("-I../tmp_install/include/postgresql/server") + // // Finish the builder and generate the bindings. + // .generate() - // Unwrap the Result and panic on failure. .expect("Unable to generate bindings"); // Write the bindings to the $OUT_DIR/bindings.rs file. diff --git a/postgres_ffi/pg_control_ffi.h b/postgres_ffi/pg_control_ffi.h index 169e66977b..805ccfa724 100644 --- a/postgres_ffi/pg_control_ffi.h +++ b/postgres_ffi/pg_control_ffi.h @@ -1,4 +1,15 @@ +/* + * This header file is the input to bindgen. It includes all the + * PostgreSQL headers that we need to auto-generate Rust structs + * from. If you need to expose a new struct to Rust code, add the + * header here, and whitelist the struct in the build.rs file. + */ #include "c.h" #include "catalog/pg_control.h" +/* + * PostgreSQL uses "offsetof(ControlFileData, crc)" in multiple places to get the + * size of the control file up to the CRC, which is the last field, but there is + * no constant for it. We also need it in the Rust code. + */ const uint32 PG_CONTROLFILEDATA_OFFSETOF_CRC = offsetof(ControlFileData, crc); diff --git a/postgres_ffi/src/controlfile_utils.rs b/postgres_ffi/src/controlfile_utils.rs index 6b41c11749..31e950759b 100644 --- a/postgres_ffi/src/controlfile_utils.rs +++ b/postgres_ffi/src/controlfile_utils.rs @@ -1,3 +1,28 @@ +//! +//! Utilities for reading and writing the PostgreSQL control file. +//! +//! The PostgreSQL control file is one the first things that the PostgreSQL +//! server reads when it starts up. It indicates whether the server was shut +//! down cleanly, or if it crashed or was restored from online backup so that +//! WAL recovery needs to be performed. It also contains a copy of the latest +//! checkpoint record and its location in the WAL. +//! +//! The control file also contains fields for detecting whether the +//! data directory is compatible with a postgres binary. That includes +//! a version number, configuration options that can be set at +//! compilation time like the block size, and the platform's alignment +//! and endianess information. (The PostgreSQL on-disk file format is +//! not portable across platforms.) +//! +//! The control file is stored in the PostgreSQL data directory, as +//! `global/pg_control`. The data stored in it is designed to be smaller than +//! 512 bytes, on the assumption that it can be updated atomically. The actual +//! file is larger, 8192 bytes, but the rest of it is just filled with zeros. +//! +//! See src/include/catalog/pg_control.h in the PostgreSQL sources for more +//! information. You can use PostgreSQL's pg_controldata utility to view its +//! contents. +//! use crate::{ControlFileData, PG_CONTROLFILEDATA_OFFSETOF_CRC, PG_CONTROL_FILE_SIZE}; use bytes::{Buf, Bytes, BytesMut}; diff --git a/postgres_ffi/src/pg_constants.rs b/postgres_ffi/src/pg_constants.rs index bff7683055..760348318d 100644 --- a/postgres_ffi/src/pg_constants.rs +++ b/postgres_ffi/src/pg_constants.rs @@ -1,6 +1,11 @@ //! //! Misc constants, copied from PostgreSQL headers. //! +//! TODO: These probably should be auto-generated using bindgen, +//! rather than copied by hand. Although on the other hand, it's nice +//! to have them all here in one place, and have the ability to add +//! comments on them. +//! // // From pg_tablespace_d.h