From 330d9a8b024be4ab55384ebf746fd0b704cbd018 Mon Sep 17 00:00:00 2001 From: abhijeet Date: Thu, 4 Jan 2024 13:09:41 +0000 Subject: [PATCH] testing if santiser work moved sanitizers in its owm workflow merged all jobs into onme cleaned up failing job cleaned up failing job running just tests fixing build reverting changes fixing linter error and build error clearning up job added wal and extension builds fixing build fixing build fixing build added use sanitizer patch testing if sanitiser work in main workflow fixed format issue fixing format issue fixing format issue added flags disabled flags enabling flags enabling flags added more options to flag fixing build fixing build testing the regression run added asan and usban flag for regression test commented unit test and release build fixing build fix neon for sanitizers enabled unit test updated branch to test the fix updated branch to test the fix updated the commit id fixing build restoring the submodules to main updated git modules and revision of commit updated postgres 16 vendor dir removed test --- .github/workflows/build_and_test.yml | 81 +++++++++++++++++-------- .gitmodules | 2 +- Makefile | 4 +- control_plane/src/background_process.rs | 8 ++- libs/postgres_ffi/wal_craft/src/lib.rs | 8 +++ libs/utils/scripts/restore_from_wal.sh | 2 +- libs/walproposer/build.rs | 3 + pageserver/src/tenant.rs | 8 +++ pgxn/neon/file_cache.c | 10 +-- pgxn/neon/walproposer.c | 3 +- test_runner/regress/test_pg_regress.py | 6 +- vendor/postgres-v16 | 2 +- vendor/revisions.json | 6 +- 13 files changed, 102 insertions(+), 41 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 2b88f09b3d..4482b8d9da 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -141,7 +141,7 @@ jobs: # Some of our rust modules use FFI and need those to be checked - name: Get postgres headers - run: make postgres-headers -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" make postgres-headers -j$(nproc) # cargo hack runs the given cargo subcommand (clippy in this case) for all feature combinations. # This will catch compiler & clippy warnings in all feature combinations. @@ -260,18 +260,29 @@ jobs: # # We run tests with addtional features, that are turned off by default (e.g. in release builds), see # corresponding Cargo.toml files for their descriptions. + # ASAN_OPTIONS and UBSAN_OPTIONS are flags to enable sanitizers on build that will test for memory leaks and undefined behaviour - name: Set env variables run: | CARGO_FEATURES="--features testing" if [[ $BUILD_TYPE == "debug" ]]; then cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run" CARGO_FLAGS="--locked" + ASAN_OPTIONS="detect_leaks=0:abort_on_error=1:print_stacktrace=1:disable_coredump=0:\ + strict_string_checks=1:check_initialization_order=1:strict_init_order=1:detect_stack_use_after_return=0" + UBSAN_OPTIONS="abort_on_error=1:print_stacktrace=1" + RUSTFLAGS="-C linker=clang" elif [[ $BUILD_TYPE == "release" ]]; then cov_prefix="" + ASAN_OPTIONS="" + UBSAN_OPTIONS="" + RUSTFLAGS="" CARGO_FLAGS="--locked --release" fi { echo "cov_prefix=${cov_prefix}" + echo "ASAN_OPTIONS=${ASAN_OPTIONS}" + echo "UBSAN_OPTIONS=${UBSAN_OPTIONS}" + echo "RUSTFLAGS=${RUSTFLAGS}" echo "CARGO_FEATURES=${CARGO_FEATURES}" echo "CARGO_FLAGS=${CARGO_FLAGS}" echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo" @@ -319,46 +330,46 @@ jobs: - name: Build postgres v14 if: steps.cache_pg_14.outputs.cache-hit != 'true' - run: mold -run make postgres-v14 -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" mold -run make postgres-v14 -j$(nproc) - name: Build postgres v15 if: steps.cache_pg_15.outputs.cache-hit != 'true' - run: mold -run make postgres-v15 -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" mold -run make postgres-v15 -j$(nproc) - name: Build postgres v16 if: steps.cache_pg_16.outputs.cache-hit != 'true' - run: mold -run make postgres-v16 -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" mold -run make postgres-v16 -j$(nproc) - name: Build neon extensions - run: mold -run make neon-pg-ext -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" mold -run make neon-pg-ext -j$(nproc) - name: Build walproposer-lib - run: mold -run make walproposer-lib -j$(nproc) + run: CC=clang CARGO_BUILD_FLAGS="--features=testing" mold -run make walproposer-lib -j$(nproc) - name: Run cargo build run: | ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins --tests - - name: Run rust tests - run: | - ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES + # - name: Run rust tests + # run: | + # ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES - # Run separate tests for real S3 - export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty - export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests - export REMOTE_STORAGE_S3_REGION=eu-central-1 - # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now - ${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_s3)' + # # Run separate tests for real S3 + # export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty + # export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests + # export REMOTE_STORAGE_S3_REGION=eu-central-1 + # # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now + # ${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_s3)' - # Run separate tests for real Azure Blob Storage - # XXX: replace region with `eu-central-1`-like region - export ENABLE_REAL_AZURE_REMOTE_STORAGE=y - export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}" - export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}" - export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}" - export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}" - # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now - ${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_azure)' + # # Run separate tests for real Azure Blob Storage + # # XXX: replace region with `eu-central-1`-like region + # export ENABLE_REAL_AZURE_REMOTE_STORAGE=y + # export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}" + # export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}" + # export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}" + # export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}" + # # Avoid `$CARGO_FEATURES` since there's no `testing` feature in the e2e tests now + # ${cov_prefix} cargo nextest run $CARGO_FLAGS -E 'package(remote_storage)' -E 'test(test_real_azure)' - name: Install rust binaries run: | @@ -424,8 +435,10 @@ jobs: strategy: fail-fast: false matrix: - build_type: [ debug, release ] + build_type: [ debug ] pg_version: [ v14, v15, v16 ] + env: + BUILD_TYPE: ${{ matrix.build_type }} steps: - name: Checkout uses: actions/checkout@v3 @@ -433,6 +446,24 @@ jobs: submodules: true fetch-depth: 1 + # Set environment variable used by application at runtime + # ASAN_OPTIONS and UBSAN_OPTIONS are flags to enable sanitizers on build + # The above sanitizers will test for memory leaks and undefined behaviour + - name: Set env variables + run: | + if [[ $BUILD_TYPE == "debug" ]]; then + ASAN_OPTIONS="detect_leaks=0:abort_on_error=1:print_stacktrace=1:disable_coredump=0:\ + strict_string_checks=1:check_initialization_order=1:strict_init_order=1:detect_stack_use_after_return=0" + UBSAN_OPTIONS="abort_on_error=1:print_stacktrace=1" + elif [[ $BUILD_TYPE == "release" ]]; then + ASAN_OPTIONS="" + UBSAN_OPTIONS="" + fi + { + echo "ASAN_OPTIONS=${ASAN_OPTIONS}" + echo "UBSAN_OPTIONS=${UBSAN_OPTIONS}" + } >> $GITHUB_ENV + - name: Pytest regression tests uses: ./.github/actions/run-python-test-set with: diff --git a/.gitmodules b/.gitmodules index 1d925674a1..9fd6fd3cec 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,4 +9,4 @@ [submodule "vendor/postgres-v16"] path = vendor/postgres-v16 url = https://github.com/neondatabase/postgres.git - branch = REL_16_STABLE_neon + branch = add-build-sanitizers diff --git a/Makefile b/Makefile index 004ca3fbcf..0ae648eafc 100644 --- a/Makefile +++ b/Makefile @@ -23,7 +23,7 @@ endif UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Linux) # Seccomp BPF is only available for Linux - PG_CONFIGURE_OPTS += --with-libseccomp + NO_PG_CONFIGURE_OPTS += --with-libseccomp # libseccomp needs additional adjustments else ifeq ($(UNAME_S),Darwin) # macOS with brew-installed openssl requires explicit paths # It can be configured with OPENSSL_PREFIX variable @@ -80,6 +80,8 @@ $(POSTGRES_INSTALL_DIR)/build/%/config.status: (cd $(POSTGRES_INSTALL_DIR)/build/$* && \ env PATH="$(EXTRA_PATH_OVERRIDES):$$PATH" $(ROOT_PROJECT_DIR)/vendor/postgres-$*/configure \ CFLAGS='$(PG_CFLAGS)' \ + CPPFLAGS='-fsanitize=address -fsanitize=undefined -fno-sanitize-recover -fno-sanitize=function -Wno-cast-function-type-strict' \ + LDFLAGS='-fsanitize=address -fsanitize=undefined -static-libsan' \ $(PG_CONFIGURE_OPTS) \ --prefix=$(abspath $(POSTGRES_INSTALL_DIR))/$* > configure.log) diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index 20fa3af9b8..f291b68ae3 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -230,7 +230,13 @@ fn fill_rust_env_vars(cmd: &mut Command) -> &mut Command { let mut filled_cmd = cmd.env_clear().env("RUST_BACKTRACE", backtrace_setting); // Pass through these environment variables to the command - for var in ["LLVM_PROFILE_FILE", "FAILPOINTS", "RUST_LOG"] { + for var in [ + "LLVM_PROFILE_FILE", + "FAILPOINTS", + "RUST_LOG", + "ASAN_OPTIONS", + "UBSAN_OPTIONS", + ] { if let Some(val) = std::env::var_os(var) { filled_cmd = filled_cmd.env(var, val); } diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs index 281a180e3b..2de4e59d28 100644 --- a/libs/postgres_ffi/wal_craft/src/lib.rs +++ b/libs/postgres_ffi/wal_craft/src/lib.rs @@ -74,6 +74,14 @@ impl Conf { ensure!(path.exists(), "Command {:?} does not exist", path); let mut cmd = Command::new(path); cmd.env_clear() + .env( + "ASAN_OPTIONS", + std::env::var("ASAN_OPTIONS").unwrap_or_default(), + ) + .env( + "UBSAN_OPTIONS", + std::env::var("UBSAN_OPTIONS").unwrap_or_default(), + ) .env("LD_LIBRARY_PATH", self.pg_lib_dir()?) .env("DYLD_LIBRARY_PATH", self.pg_lib_dir()?); Ok(cmd) diff --git a/libs/utils/scripts/restore_from_wal.sh b/libs/utils/scripts/restore_from_wal.sh index 316ec8ed0d..13fc69bbc4 100755 --- a/libs/utils/scripts/restore_from_wal.sh +++ b/libs/utils/scripts/restore_from_wal.sh @@ -8,7 +8,7 @@ DATA_DIR=$3 PORT=$4 SYSID=$(od -A n -j 24 -N 8 -t d8 "$WAL_PATH"/000000010000000000000002* | cut -c 3-) rm -fr "$DATA_DIR" -env -i LD_LIBRARY_PATH="$PG_BIN"/../lib "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID" +env -i LD_LIBRARY_PATH="$PG_BIN"/../lib ASAN_OPTIONS="$ASAN_OPTIONS" UBSAN_OPTIONS="$UBSAN_OPTIONS" "$PG_BIN"/initdb -E utf8 -U cloud_admin -D "$DATA_DIR" --sysid="$SYSID" echo "port=$PORT" >> "$DATA_DIR"/postgresql.conf echo "shared_preload_libraries='\$libdir/neon_rmgr.so'" >> "$DATA_DIR"/postgresql.conf REDO_POS=0x$("$PG_BIN"/pg_controldata -D "$DATA_DIR" | grep -F "REDO location"| cut -c 42-) diff --git a/libs/walproposer/build.rs b/libs/walproposer/build.rs index fd09030dbd..e858b2d0f6 100644 --- a/libs/walproposer/build.rs +++ b/libs/walproposer/build.rs @@ -29,6 +29,9 @@ fn main() -> anyhow::Result<()> { let pgxn_neon = std::fs::canonicalize(pgxn_neon)?; let pgxn_neon = pgxn_neon.to_str().ok_or(anyhow!("Bad non-UTF path"))?; + println!("cargo:rustc-link-arg=-fsanitize=address"); + println!("cargo:rustc-link-arg=-fsanitize=undefined"); + println!("cargo:rustc-link-arg=-static-libsan"); println!("cargo:rustc-link-lib=static=pgport"); println!("cargo:rustc-link-lib=static=pgcommon"); println!("cargo:rustc-link-lib=static=walproposer"); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index ce99569beb..47e69c336e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3755,6 +3755,14 @@ async fn run_initdb( .env_clear() .env("LD_LIBRARY_PATH", &initdb_lib_dir) .env("DYLD_LIBRARY_PATH", &initdb_lib_dir) + .env( + "ASAN_OPTIONS", + std::env::var("ASAN_OPTIONS").unwrap_or_default(), + ) + .env( + "UBSAN_OPTIONS", + std::env::var("UBSAN_OPTIONS").unwrap_or_default(), + ) .stdout(Stdio::piped()) .stderr(Stdio::piped()) // If the `select!` below doesn't finish the `wait_with_output`, diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 21db666caa..0789c0912e 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -405,7 +405,7 @@ lfc_cache_contains(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno) if (LFC_ENABLED()) { entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL); - found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) != 0; + found = entry != NULL && (entry->bitmap[chunk_offs >> 5] & ((uint32)1 << (chunk_offs & 31))) != 0; } LWLockRelease(lfc_lock); return found; @@ -450,7 +450,7 @@ lfc_evict(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno) } /* remove the page from the cache */ - entry->bitmap[chunk_offs >> 5] &= ~(1 << (chunk_offs & (32 - 1))); + entry->bitmap[chunk_offs >> 5] &= ~((uint32)1 << (chunk_offs & (32 - 1))); /* * If the chunk has no live entries, we can position the chunk to be @@ -526,7 +526,7 @@ lfc_read(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, } entry = hash_search_with_hash_value(lfc_hash, &tag, hash, HASH_FIND, NULL); - if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & (1 << (chunk_offs & 31))) == 0) + if (entry == NULL || (entry->bitmap[chunk_offs >> 5] & ((uint32)1 << (chunk_offs & 31))) == 0) { /* Page is not cached */ lfc_ctl->misses += 1; @@ -678,7 +678,7 @@ lfc_write(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, const void if (--entry->access_count == 0) dlist_push_tail(&lfc_ctl->lru, &entry->lru_node); - entry->bitmap[chunk_offs >> 5] |= (1 << (chunk_offs & 31)); + entry->bitmap[chunk_offs >> 5] |= ((uint32)1 << (chunk_offs & 31)); } LWLockRelease(lfc_lock); @@ -913,7 +913,7 @@ local_cache_pages(PG_FUNCTION_ARGS) { for (int i = 0; i < BLOCKS_PER_CHUNK; i++) { - if (entry->bitmap[i >> 5] & (1 << (i & 31))) + if (entry->bitmap[i >> 5] & ((uint32)1 << (i & 31))) { fctx->record[n].pageoffs = entry->offset * BLOCKS_PER_CHUNK + i; fctx->record[n].relfilenode = NInfoGetRelNumber(BufTagGetNRelFileInfo(entry->key)); diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 171af7d2aa..3075d74881 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -993,7 +993,8 @@ DetermineEpochStartLsn(WalProposer *wp) dth = &wp->safekeeper[wp->donor].voteResponse.termHistory; wp->propTermHistory.n_entries = dth->n_entries + 1; wp->propTermHistory.entries = palloc(sizeof(TermSwitchEntry) * wp->propTermHistory.n_entries); - memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries); + if (dth->n_entries > 0) + memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries); wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].term = wp->propTerm; wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propEpochStartLsn; diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index f26d04e2f3..d79103213a 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -20,7 +20,9 @@ def test_pg_regress( env.neon_cli.create_branch("test_pg_regress", "empty") # Connect to postgres and create a database called "regression". - endpoint = env.endpoints.create_start("test_pg_regress") + endpoint = env.endpoints.create_start( + "test_pg_regress", config_lines=["max_stack_depth = 4096kB"] + ) # stack should be increased for tests to pass with asan endpoint.safe_psql("CREATE DATABASE regression") # Create some local directories for pg_regress to run in. @@ -75,7 +77,7 @@ def test_isolation( # Connect to postgres and create a database called "regression". # isolation tests use prepared transactions, so enable them endpoint = env.endpoints.create_start( - "test_isolation", config_lines=["max_prepared_transactions=100"] + "test_isolation", config_lines=["max_prepared_transactions=100", "max_stack_depth = 4096kB"] ) endpoint.safe_psql("CREATE DATABASE isolation_regression") diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 7be4a52d72..2088022bc7 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 7be4a52d728459b79b59343c57d338c3073059c8 +Subproject commit 2088022bc73e9d3d08b2817fbcffa72412e3bc7a diff --git a/vendor/revisions.json b/vendor/revisions.json index 3d626cb2bc..1eb9acd109 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,5 +1,5 @@ { - "postgres-v16": "7be4a52d728459b79b59343c57d338c3073059c8", - "postgres-v15": "c1c2272f436ed9231f6172f49de219fe71a9280d", - "postgres-v14": "82072911287cabb32018cf92c8425fa1c744def4" + "postgres-v16": "2088022bc73e9d3d08b2817fbcffa72412e3bc7a", + "postgres-v15": "a2dc225ddfc8cae1849aa2316f435c58f0333d8c", + "postgres-v14": "03358bb0b5e0d33c238710139e768db9e75cfcc8" }