Clean up

use template0copy, should work now
2026-05-20 22:50:38 +00:00 · 2022-07-04 20:11:08 -04:00 · 2022-07-04 19:21:32 -04:00 · 2022-07-04 16:41:52 -04:00 · 2022-07-04 13:21:25 -04:00 · 2022-07-04 13:09:50 -04:00
12 changed files with 389 additions and 203 deletions
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -100,10 +100,8 @@ jobs:
          name: Rust build << parameters.build_type >>
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS="--release --features profiling"
            fi

@@ -112,7 +110,7 @@ jobs:
            export RUSTC_WRAPPER=cachepot
            export AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}"
            export AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}"
-            "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
+            mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
            cachepot -s

      - save_cache:
@@ -128,32 +126,24 @@ jobs:
          name: cargo test
          command: |
            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
              CARGO_FLAGS=
            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
              CARGO_FLAGS=--release
            fi

-            "${cov_prefix[@]}" cargo test $CARGO_FLAGS
+            cargo test $CARGO_FLAGS

        # Install the rust binaries, for use by test jobs
      - run:
          name: Install rust binaries
          command: |
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            binaries=$(
-              "${cov_prefix[@]}" cargo metadata --format-version=1 --no-deps |
+              cargo metadata --format-version=1 --no-deps |
              jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
            )

            test_exe_paths=$(
-              "${cov_prefix[@]}" cargo test --message-format=json --no-run |
+              cargo test --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )

@@ -166,34 +156,15 @@ jobs:
              SRC=target/$BUILD_TYPE/$bin
              DST=/tmp/zenith/bin/$bin
              cp $SRC $DST
-              echo $DST >> /tmp/zenith/etc/binaries.list
            done

-            # Install test executables (for code coverage)
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              for bin in $test_exe_paths; do
-                SRC=$bin
-                DST=/tmp/zenith/test_bin/$(basename $bin)
-                cp $SRC $DST
-                echo $DST >> /tmp/zenith/etc/binaries.list
-              done
-            fi
-
        # Install the postgres binaries, for use by test jobs
      - run:
          name: Install postgres binaries
          command: |
            cp -a tmp_install /tmp/zenith/pg_install

-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-
-        # Save the rust binaries and coverage data for other jobs in this workflow.
+      # Save rust binaries for other jobs in the workflow
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
@@ -314,12 +285,6 @@ jobs:

            export GITHUB_SHA=$CIRCLE_SHA1

-            if [[ $BUILD_TYPE == "debug" ]]; then
-              cov_prefix=(scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage run)
-            elif [[ $BUILD_TYPE == "release" ]]; then
-              cov_prefix=()
-            fi
-
            # Run the tests.
            #
            # The junit.xml file allows CircleCI to display more fine-grained test information
@@ -330,7 +295,7 @@ jobs:
            # -n4 uses four processes to run tests via pytest-xdist
            # -s is not used to prevent pytest from capturing output, because tests are running
            # in parallel and logs are mixed between different tests
-            "${cov_prefix[@]}" ./scripts/pytest \
+            ./scripts/pytest \
              --junitxml=$TEST_OUTPUT/junit.xml \
              --tb=short \
              --verbose \
@@ -359,67 +324,12 @@ jobs:
      # The store_test_results step tells CircleCI where to find the junit.xml file.
      - store_test_results:
          path: /tmp/test_output
-      - run:
-          name: Merge coverage data
-          command: |
-            # This will speed up workspace uploads
-            if [[ $BUILD_TYPE == "debug" ]]; then
-              scripts/coverage "--profraw-prefix=$CIRCLE_JOB" --dir=/tmp/zenith/coverage merge
-            fi
-      # Save coverage data (if any)
+      # Save data (if any)
      - persist_to_workspace:
          root: /tmp/zenith
          paths:
            - "*"

-  coverage-report:
-    executor: neon-xlarge-executor
-    steps:
-      - attach_workspace:
-          at: /tmp/zenith
-      - checkout
-      - restore_cache:
-          name: Restore rust cache
-          keys:
-            # Require an exact match. While an out of date cache might speed up the build,
-            # there's no way to clean out old packages, so the cache grows every time something
-            # changes.
-            - v04-rust-cache-deps-debug-{{ checksum "Cargo.lock" }}
-      - run:
-          name: Build coverage report
-          command: |
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/coverage \
-              --dir=/tmp/zenith/coverage report \
-              --input-objects=/tmp/zenith/etc/binaries.list \
-              --commit-url=$COMMIT_URL \
-              --format=github
-      - run:
-          name: Upload coverage report
-          command: |
-            LOCAL_REPO=$CIRCLE_PROJECT_USERNAME/$CIRCLE_PROJECT_REPONAME
-            REPORT_URL=https://neondatabase.github.io/zenith-coverage-data/$CIRCLE_SHA1
-            COMMIT_URL=https://github.com/neondatabase/neon/commit/$CIRCLE_SHA1
-
-            scripts/git-upload \
-              --repo=https://$VIP_VAP_ACCESS_TOKEN@github.com/neondatabase/zenith-coverage-data.git \
-              --message="Add code coverage for $COMMIT_URL" \
-              copy /tmp/zenith/coverage/report $CIRCLE_SHA1 # COPY FROM TO_RELATIVE
-
-            # Add link to the coverage report to the commit
-            curl -f -X POST \
-            https://api.github.com/repos/$LOCAL_REPO/statuses/$CIRCLE_SHA1 \
-            -H "Accept: application/vnd.github.v3+json" \
-            --user "$CI_ACCESS_TOKEN" \
-            --data \
-              "{
-                \"state\": \"success\",
-                \"context\": \"zenith-coverage\",
-                \"description\": \"Coverage report is ready\",
-                \"target_url\": \"$REPORT_URL\"
-              }"
-
  # Build neondatabase/neon:latest image and push it to Docker hub
  docker-image:
    docker:
@@ -730,12 +640,6 @@ workflows:
          save_perf_report: true
          requires:
            - build-neon-release
-      - coverage-report:
-          # Context passes credentials for gh api
-          context: CI_ACCESS_TOKEN
-          requires:
-            # TODO: consider adding more
-            - other-tests-debug
      - docker-image:
          # Context gives an ability to login
          context: Docker Hub
--- a/.github/actions/run-python-test-set/action.yml
+++ b/.github/actions/run-python-test-set/action.yml
@@ -92,7 +92,7 @@ runs:
        fi

        if [[ "${{ inputs.build_type }}" == "debug" ]]; then
-          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
+          cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
        elif [[ "${{ inputs.build_type }}" == "release" ]]; then
          cov_prefix=()
        fi
--- a/.github/actions/save-coverage-data/action.yml
+++ b/.github/actions/save-coverage-data/action.yml
@@ -6,7 +6,7 @@ runs:
  steps:
    - name: Merge coverage data
      shell: bash -ex {0}
-      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage/ merge
+      run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

    - name: Upload coverage data
      uses: actions/upload-artifact@v3
@@ -14,4 +14,4 @@ runs:
        retention-days: 7
        if-no-files-found: error
        name: coverage-data-artifact
-        path: /tmp/neon/coverage/
+        path: /tmp/coverage/
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -17,11 +17,6 @@ concurrency:
 env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'
-  AWS_ACCESS_KEY_ID: ${{ secrets.CACHEPOT_AWS_ACCESS_KEY_ID }}
-  AWS_SECRET_ACCESS_KEY: ${{ secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY }}
-  CACHEPOT_BUCKET: zenith-rust-cachepot
-  RUSTC_WRAPPER: cachepot
-

 jobs:
  build-postgres:
@@ -54,7 +49,7 @@ jobs:

      - name: Build postgres
        if: steps.cache_pg.outputs.cache-hit != 'true'
-        run: COPT='-Werror' mold -run make postgres -j$(nproc)
+        run: mold -run make postgres -j$(nproc)

      # actions/cache@v3 does not allow concurrently using the same cache across job steps, so use a separate cache
      - name: Prepare postgres artifact
@@ -106,12 +101,15 @@ jobs:
            ~/.cargo/registry/
            ~/.cargo/git/
            target/
-          key: v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+          # Fall back to older versions of the key, if no cache for current Cargo.lock was found
+          key: |
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-${{ hashFiles('Cargo.lock') }}
+            v2-${{ runner.os }}-${{ matrix.build_type }}-cargo-${{ matrix.rust_toolchain }}-

      - name: Run cargo build
        run: |
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
            CARGO_FLAGS=
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
@@ -119,12 +117,11 @@ jobs:
          fi

          "${cov_prefix[@]}" mold -run cargo build $CARGO_FLAGS --features failpoints --bins --tests
-          cachepot -s

      - name: Run cargo test
        run: |
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
            CARGO_FLAGS=
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
@@ -136,7 +133,7 @@ jobs:
      - name: Install rust binaries
        run: |
          if [[ $BUILD_TYPE == "debug" ]]; then
-            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/neon/coverage run)
+            cov_prefix=(scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage run)
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=()
          fi
@@ -154,7 +151,9 @@ jobs:
          mkdir -p /tmp/neon/bin/
          mkdir -p /tmp/neon/test_bin/
          mkdir -p /tmp/neon/etc/
-          mkdir -p /tmp/neon/coverage/
+
+          # Keep bloated coverage data files away from the rest of the artifact
+          mkdir -p /tmp/coverage/

          # Install target binaries
          for bin in $binaries; do
@@ -166,13 +165,13 @@ jobs:
          # Install test executables and write list of all binaries (for code coverage)
          if [[ $BUILD_TYPE == "debug" ]]; then
            for bin in $binaries; do
-              echo "/tmp/neon/bin/$bin" >> /tmp/neon/coverage/binaries.list
+              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
            done
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)
              cp "$SRC" "$DST"
-              echo "$DST" >> /tmp/neon/coverage/binaries.list
+              echo "$DST" >> /tmp/coverage/binaries.list
            done
          fi

@@ -316,7 +315,10 @@ jobs:
        uses: actions/download-artifact@v3
        with:
          name: coverage-data-artifact
-          path: /tmp/neon/coverage/
+          path: /tmp/coverage/
+
+      - name: Merge coverage data
+        run: scripts/coverage "--profraw-prefix=$GITHUB_JOB" --dir=/tmp/coverage merge

      - name: Build and upload coverage report
        run: |
@@ -325,8 +327,8 @@ jobs:
          COMMIT_URL=https://github.com/${{ github.repository }}/commit/$COMMIT_SHA

          scripts/coverage \
-            --dir=/tmp/neon/coverage report \
-            --input-objects=/tmp/neon/coverage/binaries.list \
+            --dir=/tmp/coverage report \
+            --input-objects=/tmp/coverage/binaries.list \
            --commit-url=$COMMIT_URL \
            --format=github

@@ -335,7 +337,7 @@ jobs:
          scripts/git-upload \
            --repo=https://${{ secrets.VIP_VAP_ACCESS_TOKEN }}@github.com/${{ github.repository_owner }}/zenith-coverage-data.git \
            --message="Add code coverage for $COMMIT_URL" \
-            copy /tmp/neon/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE
+            copy /tmp/coverage/report $COMMIT_SHA # COPY FROM TO_RELATIVE

          # Add link to the coverage report to the commit
          curl -f -X POST \
--- a/.github/workflows/codestyle.yml
+++ b/.github/workflows/codestyle.yml
@@ -26,7 +26,7 @@ jobs:
        # Rust toolchains (e.g. nightly or 1.37.0), add them here.
        rust_toolchain: [1.58]
        os: [ubuntu-latest, macos-latest]
-    timeout-minutes: 30
+    timeout-minutes: 50
    name: run regression test suite
    runs-on: ${{ matrix.os }}

--- a/Cargo.lock
+++ b/Cargo.lock
@@ -650,7 +650,7 @@ dependencies = [
 "crossterm_winapi",
 "libc",
 "mio",
- "parking_lot 0.12.1",
+ "parking_lot 0.12.0",
 "signal-hook",
 "signal-hook-mio",
 "winapi",
@@ -1899,7 +1899,6 @@ dependencies = [
 "metrics",
 "nix",
 "once_cell",
- "parking_lot 0.12.1",
 "postgres",
 "postgres-protocol",
 "postgres-types",
@@ -1940,9 +1939,9 @@ dependencies = [

 [[package]]
 name = "parking_lot"
-version = "0.12.1"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
+checksum = "87f5ec2493a61ac0506c0f4199f99070cbe83857b0337006a30f3e6719b8ef58"
 dependencies = [
 "lock_api",
 "parking_lot_core 0.9.2",
@@ -2309,7 +2308,7 @@ dependencies = [
 "lazy_static",
 "md5",
 "metrics",
- "parking_lot 0.12.1",
+ "parking_lot 0.12.0",
 "pin-project-lite",
 "rand",
 "rcgen",
@@ -3357,7 +3356,7 @@ dependencies = [
 "fallible-iterator",
 "futures",
 "log",
- "parking_lot 0.12.1",
+ "parking_lot 0.12.0",
 "percent-encoding",
 "phf",
 "pin-project-lite",
--- a/pageserver/Cargo.toml
+++ b/pageserver/Cargo.toml
@@ -63,8 +63,6 @@ workspace_hack = { version = "0.1", path = "../workspace_hack" }
 close_fds = "0.3.2"
 walkdir = "2.3.2"

-parking_lot = "0.12.1"
-
 [dev-dependencies]
 hex-literal = "0.3"
 tempfile = "3.2"
--- a/pageserver/src/page_cache.rs
+++ b/pageserver/src/page_cache.rs
@@ -36,12 +36,13 @@
 //! mapping is automatically removed and the slot is marked free.
 //!

-use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
-
 use std::{
    collections::{hash_map::Entry, HashMap},
    convert::TryInto,
-    sync::atomic::{AtomicU8, AtomicUsize, Ordering},
+    sync::{
+        atomic::{AtomicU8, AtomicUsize, Ordering},
+        RwLock, RwLockReadGuard, RwLockWriteGuard, TryLockError,
+    },
 };

 use once_cell::sync::OnceCell;
@@ -384,7 +385,7 @@ impl PageCache {
        for slot_idx in 0..self.slots.len() {
            let slot = &self.slots[slot_idx];

-            let mut inner = slot.inner.write();
+            let mut inner = slot.inner.write().unwrap();
            if let Some(key) = &inner.key {
                match key {
                    CacheKey::EphemeralPage { file_id, blkno: _ } if *file_id == drop_file_id => {
@@ -412,7 +413,7 @@ impl PageCache {
        for slot_idx in 0..self.slots.len() {
            let slot = &self.slots[slot_idx];

-            let mut inner = slot.inner.write();
+            let mut inner = slot.inner.write().unwrap();
            if let Some(key) = &inner.key {
                match key {
                    CacheKey::ImmutableFilePage { file_id, blkno: _ }
@@ -453,7 +454,7 @@ impl PageCache {
            // that it's still what we expected (because we released the mapping
            // lock already, another thread could have evicted the page)
            let slot = &self.slots[slot_idx];
-            let inner = slot.inner.read();
+            let inner = slot.inner.read().unwrap();
            if inner.key.as_ref() == Some(cache_key) {
                slot.inc_usage_count();
                return Some(PageReadGuard(inner));
@@ -542,7 +543,7 @@ impl PageCache {
            // that it's still what we expected (because we don't released the mapping
            // lock already, another thread could have evicted the page)
            let slot = &self.slots[slot_idx];
-            let inner = slot.inner.write();
+            let inner = slot.inner.write().unwrap();
            if inner.key.as_ref() == Some(cache_key) {
                slot.inc_usage_count();
                return Some(PageWriteGuard { inner, valid: true });
@@ -610,7 +611,7 @@ impl PageCache {
    fn search_mapping(&self, cache_key: &mut CacheKey) -> Option<usize> {
        match cache_key {
            CacheKey::MaterializedPage { hash_key, lsn } => {
-                let map = self.materialized_page_map.read();
+                let map = self.materialized_page_map.read().unwrap();
                let versions = map.get(hash_key)?;

                let version_idx = match versions.binary_search_by_key(lsn, |v| v.lsn) {
@@ -623,11 +624,11 @@ impl PageCache {
                Some(version.slot_idx)
            }
            CacheKey::EphemeralPage { file_id, blkno } => {
-                let map = self.ephemeral_page_map.read();
+                let map = self.ephemeral_page_map.read().unwrap();
                Some(*map.get(&(*file_id, *blkno))?)
            }
            CacheKey::ImmutableFilePage { file_id, blkno } => {
-                let map = self.immutable_page_map.read();
+                let map = self.immutable_page_map.read().unwrap();
                Some(*map.get(&(*file_id, *blkno))?)
            }
        }
@@ -640,7 +641,7 @@ impl PageCache {
    fn search_mapping_for_write(&self, key: &CacheKey) -> Option<usize> {
        match key {
            CacheKey::MaterializedPage { hash_key, lsn } => {
-                let map = self.materialized_page_map.read();
+                let map = self.materialized_page_map.read().unwrap();
                let versions = map.get(hash_key)?;

                if let Ok(version_idx) = versions.binary_search_by_key(lsn, |v| v.lsn) {
@@ -650,11 +651,11 @@ impl PageCache {
                }
            }
            CacheKey::EphemeralPage { file_id, blkno } => {
-                let map = self.ephemeral_page_map.read();
+                let map = self.ephemeral_page_map.read().unwrap();
                Some(*map.get(&(*file_id, *blkno))?)
            }
            CacheKey::ImmutableFilePage { file_id, blkno } => {
-                let map = self.immutable_page_map.read();
+                let map = self.immutable_page_map.read().unwrap();
                Some(*map.get(&(*file_id, *blkno))?)
            }
        }
@@ -669,7 +670,7 @@ impl PageCache {
                hash_key: old_hash_key,
                lsn: old_lsn,
            } => {
-                let mut map = self.materialized_page_map.write();
+                let mut map = self.materialized_page_map.write().unwrap();
                if let Entry::Occupied(mut old_entry) = map.entry(old_hash_key.clone()) {
                    let versions = old_entry.get_mut();

@@ -684,12 +685,12 @@ impl PageCache {
                }
            }
            CacheKey::EphemeralPage { file_id, blkno } => {
-                let mut map = self.ephemeral_page_map.write();
+                let mut map = self.ephemeral_page_map.write().unwrap();
                map.remove(&(*file_id, *blkno))
                    .expect("could not find old key in mapping");
            }
            CacheKey::ImmutableFilePage { file_id, blkno } => {
-                let mut map = self.immutable_page_map.write();
+                let mut map = self.immutable_page_map.write().unwrap();
                map.remove(&(*file_id, *blkno))
                    .expect("could not find old key in mapping");
            }
@@ -707,7 +708,7 @@ impl PageCache {
                hash_key: new_key,
                lsn: new_lsn,
            } => {
-                let mut map = self.materialized_page_map.write();
+                let mut map = self.materialized_page_map.write().unwrap();
                let versions = map.entry(new_key.clone()).or_default();
                match versions.binary_search_by_key(new_lsn, |v| v.lsn) {
                    Ok(version_idx) => Some(versions[version_idx].slot_idx),
@@ -724,7 +725,7 @@ impl PageCache {
                }
            }
            CacheKey::EphemeralPage { file_id, blkno } => {
-                let mut map = self.ephemeral_page_map.write();
+                let mut map = self.ephemeral_page_map.write().unwrap();
                match map.entry((*file_id, *blkno)) {
                    Entry::Occupied(entry) => Some(*entry.get()),
                    Entry::Vacant(entry) => {
@@ -734,7 +735,7 @@ impl PageCache {
                }
            }
            CacheKey::ImmutableFilePage { file_id, blkno } => {
-                let mut map = self.immutable_page_map.write();
+                let mut map = self.immutable_page_map.write().unwrap();
                match map.entry((*file_id, *blkno)) {
                    Entry::Occupied(entry) => Some(*entry.get()),
                    Entry::Vacant(entry) => {
@@ -764,8 +765,11 @@ impl PageCache {

            if slot.dec_usage_count() == 0 {
                let mut inner = match slot.inner.try_write() {
-                    Some(inner) => inner,
-                    None => {
+                    Ok(inner) => inner,
+                    Err(TryLockError::Poisoned(err)) => {
+                        panic!("buffer lock was poisoned: {:?}", err)
+                    }
+                    Err(TryLockError::WouldBlock) => {
                        // If we have looped through the whole buffer pool 10 times
                        // and still haven't found a victim buffer, something's wrong.
                        // Maybe all the buffers were in locked. That could happen in
--- a/pageserver/src/tenant_config.rs
+++ b/pageserver/src/tenant_config.rs
@@ -37,7 +37,7 @@ pub mod defaults {
    pub const DEFAULT_PITR_INTERVAL: &str = "30 days";
    pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "2 seconds";
    pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds";
-    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10_000;
+    pub const DEFAULT_MAX_WALRECEIVER_LSN_WAL_LAG: u64 = 10 * 1024 * 1024;
 }

 /// Per-tenant configuration options
--- a/pageserver/src/walreceiver.rs
+++ b/pageserver/src/walreceiver.rs
@@ -178,7 +178,7 @@ async fn shutdown_all_wal_connections(
 /// That may lead to certain events not being observed by the listener.
 #[derive(Debug)]
 struct TaskHandle<E> {
-    handle: JoinHandle<()>,
+    handle: JoinHandle<Result<(), String>>,
    events_receiver: watch::Receiver<TaskEvent<E>>,
    cancellation: watch::Sender<()>,
 }
@@ -205,8 +205,8 @@ impl<E: Clone> TaskHandle<E> {

        let sender = Arc::clone(&events_sender);
        let handle = tokio::task::spawn(async move {
-            let task_result = task(sender, cancellation_receiver).await;
-            events_sender.send(TaskEvent::End(task_result)).ok();
+            events_sender.send(TaskEvent::Started).ok();
+            task(sender, cancellation_receiver).await
        });

        TaskHandle {
@@ -216,6 +216,16 @@ impl<E: Clone> TaskHandle<E> {
        }
    }

+    async fn next_task_event(&mut self) -> TaskEvent<E> {
+        select! {
+            next_task_event = self.events_receiver.changed() => match next_task_event {
+                Ok(()) => self.events_receiver.borrow().clone(),
+                Err(_task_channel_part_dropped) => join_on_handle(&mut self.handle).await,
+            },
+            task_completion_result = join_on_handle(&mut self.handle) => task_completion_result,
+        }
+    }
+
    /// Aborts current task, waiting for it to finish.
    async fn shutdown(self) {
        self.cancellation.send(()).ok();
@@ -225,6 +235,19 @@ impl<E: Clone> TaskHandle<E> {
    }
 }

+async fn join_on_handle<E>(handle: &mut JoinHandle<Result<(), String>>) -> TaskEvent<E> {
+    match handle.await {
+        Ok(task_result) => TaskEvent::End(task_result),
+        Err(e) => {
+            if e.is_cancelled() {
+                TaskEvent::End(Ok(()))
+            } else {
+                TaskEvent::End(Err(format!("WAL receiver task panicked: {e}")))
+            }
+        }
+    }
+}
+
 /// A step to process timeline attach/detach events to enable/disable the corresponding WAL receiver machinery.
 /// In addition to WAL streaming management, the step ensures that corresponding tenant has its service threads enabled or disabled.
 /// This is done here, since only walreceiver knows when a certain tenant has no streaming enabled.
--- a/pageserver/src/walreceiver/connection_manager.rs
+++ b/pageserver/src/walreceiver/connection_manager.rs
@@ -104,49 +104,29 @@ async fn connection_manager_loop_step(

            Some(wal_connection_update) = async {
                match walreceiver_state.wal_connection.as_mut() {
-                    Some(wal_connection) => {
-                        let receiver = &mut wal_connection.connection_task.events_receiver;
-                        Some(match receiver.changed().await {
-                            Ok(()) => receiver.borrow().clone(),
-                            Err(_cancellation_error) => TaskEvent::End(Ok(())),
-                        })
-                    }
+                    Some(wal_connection) => Some(wal_connection.connection_task.next_task_event().await),
                    None => None,
                }
            } => {
-                let (connection_update, reset_connection_attempts) = match &wal_connection_update {
-                    TaskEvent::Started => (Some(Utc::now().naive_utc()), true),
-                    TaskEvent::NewEvent(replication_feedback) => (Some(DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc()), true),
+                let wal_connection = walreceiver_state.wal_connection.as_mut().expect("Should have a connection, as checked by the corresponding select! guard");
+                match &wal_connection_update {
+                    TaskEvent::Started => {
+                        wal_connection.latest_connection_update = Utc::now().naive_utc();
+                        *walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0) += 1;
+                    },
+                    TaskEvent::NewEvent(replication_feedback) => {
+                        wal_connection.latest_connection_update = DateTime::<Local>::from(replication_feedback.ps_replytime).naive_utc();
+                        // reset connection attempts here only, the only place where both nodes
+                        // explicitly confirmn with replication feedback that they are connected to each other
+                        walreceiver_state.wal_connection_attempts.remove(&wal_connection.sk_id);
+                    },
                    TaskEvent::End(end_result) => {
-                        let should_reset_connection_attempts = match end_result {
-                            Ok(()) => {
-                                debug!("WAL receiving task finished");
-                                true
-                            },
-                            Err(e) => {
-                                warn!("WAL receiving task failed: {e}");
-                                false
-                            },
+                        match end_result {
+                            Ok(()) => debug!("WAL receiving task finished"),
+                            Err(e) => warn!("WAL receiving task failed: {e}"),
                        };
                        walreceiver_state.wal_connection = None;
-                        (None, should_reset_connection_attempts)
                    },
-                };
-
-                if let Some(connection_update) = connection_update {
-                    match &mut walreceiver_state.wal_connection {
-                        Some(wal_connection) => {
-                            wal_connection.latest_connection_update = connection_update;
-
-                            let attempts_entry = walreceiver_state.wal_connection_attempts.entry(wal_connection.sk_id).or_insert(0);
-                            if reset_connection_attempts {
-                                *attempts_entry = 0;
-                            } else {
-                                *attempts_entry += 1;
-                            }
-                        },
-                        None => error!("Received connection update for WAL connection that is not active, update: {wal_connection_update:?}"),
-                    }
                }
            },

@@ -406,10 +386,8 @@ impl WalreceiverState {
            Some(existing_wal_connection) => {
                let connected_sk_node = existing_wal_connection.sk_id;

-                let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) = self
-                    .applicable_connection_candidates()
-                    .filter(|&(sk_id, _, _)| sk_id != connected_sk_node)
-                    .max_by_key(|(_, info, _)| info.commit_lsn)?;
+                let (new_sk_id, new_safekeeper_etcd_data, new_wal_producer_connstr) =
+                    self.select_connection_candidate(Some(connected_sk_node))?;

                let now = Utc::now().naive_utc();
                if let Ok(latest_interaciton) =
@@ -462,9 +440,8 @@ impl WalreceiverState {
                }
            }
            None => {
-                let (new_sk_id, _, new_wal_producer_connstr) = self
-                    .applicable_connection_candidates()
-                    .max_by_key(|(_, info, _)| info.commit_lsn)?;
+                let (new_sk_id, _, new_wal_producer_connstr) =
+                    self.select_connection_candidate(None)?;
                return Some(NewWalConnectionCandidate {
                    safekeeper_id: new_sk_id,
                    wal_producer_connstr: new_wal_producer_connstr,
@@ -476,6 +453,49 @@ impl WalreceiverState {
        None
    }

+    /// Selects the best possible candidate, based on the data collected from etcd updates about the safekeepers.
+    /// Optionally, omits the given node, to support gracefully switching from a healthy safekeeper to another.
+    ///
+    /// The candidate that is chosen:
+    /// * has fewest connection attempts from pageserver to safekeeper node (reset every time the WAL replication feedback is sent)
+    /// * has greatest data Lsn among the ones that are left
+    ///
+    /// NOTE:
+    /// We evict timeline data received from etcd based on time passed since it was registered, along with its connection attempts values, but
+    /// otherwise to reset the connection attempts, a successful connection to that node is needed.
+    /// That won't happen now, before all nodes with less connection attempts are connected to first, which might leave the sk node with more advanced state to be ignored.
+    fn select_connection_candidate(
+        &self,
+        node_to_omit: Option<NodeId>,
+    ) -> Option<(NodeId, &SkTimelineInfo, String)> {
+        let all_candidates = self
+            .applicable_connection_candidates()
+            .filter(|&(sk_id, _, _)| Some(sk_id) != node_to_omit)
+            .collect::<Vec<_>>();
+
+        let smallest_attempts_allowed = all_candidates
+            .iter()
+            .map(|(sk_id, _, _)| {
+                self.wal_connection_attempts
+                    .get(sk_id)
+                    .copied()
+                    .unwrap_or(0)
+            })
+            .min()?;
+
+        all_candidates
+            .into_iter()
+            .filter(|(sk_id, _, _)| {
+                smallest_attempts_allowed
+                    >= self
+                        .wal_connection_attempts
+                        .get(sk_id)
+                        .copied()
+                        .unwrap_or(0)
+            })
+            .max_by_key(|(_, info, _)| info.commit_lsn)
+    }
+
    fn applicable_connection_candidates(
        &self,
    ) -> impl Iterator<Item = (NodeId, &SkTimelineInfo, String)> {
@@ -500,15 +520,25 @@ impl WalreceiverState {
    }

    fn cleanup_old_candidates(&mut self) {
-        self.wal_stream_candidates.retain(|_, etcd_info| {
+        let mut node_ids_to_remove = Vec::with_capacity(self.wal_stream_candidates.len());
+
+        self.wal_stream_candidates.retain(|node_id, etcd_info| {
            if let Ok(time_since_latest_etcd_update) =
                (Utc::now().naive_utc() - etcd_info.latest_update).to_std()
            {
-                time_since_latest_etcd_update < self.lagging_wal_timeout
+                let should_retain = time_since_latest_etcd_update < self.lagging_wal_timeout;
+                if !should_retain {
+                    node_ids_to_remove.push(*node_id);
+                }
+                should_retain
            } else {
                true
            }
        });
+
+        for node_id in node_ids_to_remove {
+            self.wal_connection_attempts.remove(&node_id);
+        }
    }
 }

@@ -843,6 +873,64 @@ mod tests {
        Ok(())
    }

+    #[tokio::test]
+    async fn candidate_with_many_connection_failures() -> anyhow::Result<()> {
+        let harness = RepoHarness::create("candidate_with_many_connection_failures")?;
+        let mut state = dummy_state(&harness);
+        let now = Utc::now().naive_utc();
+
+        let current_lsn = Lsn(100_000).align();
+        let bigger_lsn = Lsn(current_lsn.0 + 100).align();
+
+        state.wal_connection = None;
+        state.wal_stream_candidates = HashMap::from([
+            (
+                NodeId(0),
+                EtcdSkTimeline {
+                    timeline: SkTimelineInfo {
+                        last_log_term: None,
+                        flush_lsn: None,
+                        commit_lsn: Some(bigger_lsn),
+                        backup_lsn: None,
+                        remote_consistent_lsn: None,
+                        peer_horizon_lsn: None,
+                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
+                    },
+                    etcd_version: 0,
+                    latest_update: now,
+                },
+            ),
+            (
+                NodeId(1),
+                EtcdSkTimeline {
+                    timeline: SkTimelineInfo {
+                        last_log_term: None,
+                        flush_lsn: None,
+                        commit_lsn: Some(current_lsn),
+                        backup_lsn: None,
+                        remote_consistent_lsn: None,
+                        peer_horizon_lsn: None,
+                        safekeeper_connstr: Some(DUMMY_SAFEKEEPER_CONNSTR.to_string()),
+                    },
+                    etcd_version: 0,
+                    latest_update: now,
+                },
+            ),
+        ]);
+        state.wal_connection_attempts = HashMap::from([(NodeId(0), 1), (NodeId(1), 0)]);
+
+        let candidate_with_less_errors = state
+            .next_connection_candidate()
+            .expect("Expected one candidate selected, but got none");
+        assert_eq!(
+            candidate_with_less_errors.safekeeper_id,
+            NodeId(1),
+            "Should select the node with less connection errors"
+        );
+
+        Ok(())
+    }
+
    #[tokio::test]
    async fn connection_no_etcd_data_candidate() -> anyhow::Result<()> {
        let harness = RepoHarness::create("connection_no_etcd_data_candidate")?;
--- a/test_runner/batch_others/test_complete_basebackup.py
+++ b/test_runner/batch_others/test_complete_basebackup.py
@@ -0,0 +1,168 @@
+from fixtures.neon_fixtures import VanillaPostgres
+from fixtures.utils import subprocess_capture
+import os
+import shutil
+from pathlib import Path
+import tempfile
+
+
+def get_rel_paths(log_dir, pg_bin, base_tar):
+    """Yeild list of relation paths"""
+    with tempfile.TemporaryDirectory() as restored_dir:
+        # Unpack the base tar
+        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
+
+        port = "55439"  # Probably free
+        with VanillaPostgres(restored_dir, pg_bin, port, init=False) as vanilla_pg:
+            vanilla_pg.configure([f"port={port}"])
+            vanilla_pg.start()
+
+            # Create database based on template0 because we can't connect to template0
+            query = "create database template0copy template template0"
+            vanilla_pg.safe_psql(query, user="cloud_admin")
+            vanilla_pg.safe_psql("CHECKPOINT", user="cloud_admin")
+
+            # Get all databases
+            query = "select oid, datname from pg_database"
+            oid_dbname_pairs = vanilla_pg.safe_psql(query, user="cloud_admin")
+            template0_oid = [
+                oid
+                for (oid, database) in oid_dbname_pairs
+                if database == "template0"
+            ][0]
+
+            # Get rel paths for each database
+            for oid, database in oid_dbname_pairs:
+                if database == "template0":
+                    # We can't connect to template0
+                    continue
+
+                query = "select relname, pg_relation_filepath(oid) from pg_class"
+                result = vanilla_pg.safe_psql(query, user="cloud_admin", dbname=database)
+                for relname, filepath in result:
+                    if filepath is not None:
+
+                        if database == "template0copy":
+                            # Add all template0copy paths to template0
+                            prefix = f"base/{oid}/"
+                            if filepath.startswith(prefix):
+                                suffix = filepath[len(prefix):]
+                                yield f"base/{template0_oid}/{suffix}"
+                            elif filepath.startswith("global"):
+                                print(f"skipping {database} global file {filepath}")
+                            else:
+                                raise AssertionError
+                        else:
+                            yield filepath
+
+
+def pack_base(log_dir, restored_dir, output_tar):
+    tmp_tar_name = "tmp.tar"
+    tmp_tar_path = os.path.join(restored_dir, tmp_tar_name)
+    cmd = ["tar", "-cf", tmp_tar_name] + os.listdir(restored_dir)
+    subprocess_capture(log_dir, cmd, cwd=restored_dir)
+    shutil.move(tmp_tar_path, output_tar)
+
+
+def get_files_in_tar(log_dir, tar):
+    with tempfile.TemporaryDirectory() as restored_dir:
+        # Unpack the base tar
+        subprocess_capture(log_dir, ["tar", "-xf", tar, "-C", restored_dir])
+
+        # Find empty files
+        empty_files = []
+        for root, dirs, files in os.walk(restored_dir):
+            for name in files:
+                file_path = os.path.join(root, name)
+                yield file_path[len(restored_dir) + 1:]
+
+
+def corrupt(log_dir, base_tar, output_tar):
+    """Remove all empty files and repackage. Return paths of files removed."""
+    with tempfile.TemporaryDirectory() as restored_dir:
+        # Unpack the base tar
+        subprocess_capture(log_dir, ["tar", "-xf", base_tar, "-C", restored_dir])
+
+        # Find empty files
+        empty_files = []
+        for root, dirs, files in os.walk(restored_dir):
+            for name in files:
+                file_path = os.path.join(root, name)
+                file_size = os.path.getsize(file_path)
+                if file_size == 0:
+                    empty_files.append(file_path)
+
+        # Delete empty files (just to see if they get recreated)
+        for empty_file in empty_files:
+            os.remove(empty_file)
+
+        # Repackage
+        pack_base(log_dir, restored_dir, output_tar)
+
+        # Return relative paths
+        return {
+            empty_file[len(restored_dir) + 1:]
+            for empty_file in empty_files
+        }
+
+
+def touch_missing_rels(log_dir, corrupt_tar, output_tar, paths):
+    with tempfile.TemporaryDirectory() as restored_dir:
+        # Unpack the base tar
+        subprocess_capture(log_dir, ["tar", "-xf", corrupt_tar, "-C", restored_dir])
+
+        # Touch files that don't exist
+        for path in paths:
+            absolute_path = os.path.join(restored_dir, path)
+            exists = os.path.exists(absolute_path)
+            if not exists:
+                print("File {absolute_path} didn't exist. Creating..")
+                Path(absolute_path).touch()
+
+        # Repackage
+        pack_base(log_dir, restored_dir, output_tar)
+
+
+def test_complete(test_output_dir, pg_bin):
+    # Specify directories
+    # TODO make a basebackup instead of using one from another test
+    work_dir = "/home/bojan/src/neondatabase/neon/test_output/test_import_from_pageserver/"
+    base_tar = os.path.join(work_dir, "psql_2.stdout")
+    output_tar = os.path.join(work_dir, "psql_2-completed.stdout")
+
+    # Create new base tar with missing empty files
+    corrupt_tar = os.path.join(test_output_dir, "psql_2-corrupted.stdout")
+    deleted_files = corrupt(test_output_dir, base_tar, corrupt_tar)
+    assert len(set(get_files_in_tar(test_output_dir, base_tar)) -
+               set(get_files_in_tar(test_output_dir, corrupt_tar))) > 0
+
+    # Reconstruct paths from the corrupted tar, assert it covers everything important
+    reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, corrupt_tar))
+    paths_missed = deleted_files - reconstructed_paths
+    assert paths_missed.issubset({
+        "postgresql.auto.conf",
+        "pg_ident.conf",
+    })
+
+    # Recreate the correct tar by touching files, compare with original tar
+    touch_missing_rels(test_output_dir, corrupt_tar, output_tar, reconstructed_paths)
+    paths_missed = (set(get_files_in_tar(test_output_dir, base_tar)) -
+                    set(get_files_in_tar(test_output_dir, output_tar)))
+    assert paths_missed.issubset({
+        "postgresql.auto.conf",
+        "pg_ident.conf",
+    })
+
+
+# HACK this script relies on test fixtures, but you can run it with
+# poetry run pytest -k test_main_hack and pass inputs via envvars
+#
+# The script takes a base tar, infers what empty rel files might be missing
+# and creates a new base tar with those files included. It does not modify
+# the original file.
+def test_main_hack(test_output_dir, pg_bin):
+    base_tar = os.environ['INPUT_BASE_TAR']
+    output_tar = os.environ['OUTPUT_BASE_TAR']
+
+    reconstructed_paths = set(get_rel_paths(test_output_dir, pg_bin, base_tar))
+    touch_missing_rels(test_output_dir, base_tar, output_tar, reconstructed_paths)
Author	SHA1	Message	Date
Bojan Serafimov	98bf3e7136	Clean up	2022-07-04 20:11:08 -04:00
Bojan Serafimov	c222d96bae	Clean up	2022-07-04 19:21:32 -04:00
Bojan Serafimov	b86e89d659	use template0copy, should work now	2022-07-04 16:41:52 -04:00
Bojan Serafimov	20694a9285	hardcode template0 files	2022-07-04 13:21:25 -04:00
Bojan Serafimov	5587b3a27c	Check all dbs	2022-07-04 13:09:50 -04:00
Bojan Serafimov	36d8b3b640	Add assertion	2022-07-04 12:01:53 -04:00
Bojan Serafimov	3581759e11	Use pg_relation_filepath	2022-07-04 11:45:22 -04:00
Bojan Serafimov	7e9bcaca54	WIP	2022-07-01 16:29:31 -04:00
Bojan Serafimov	dd5e6436b5	WIP	2022-07-01 15:45:51 -04:00
Bojan Serafimov	a1c6dabd35	WIP	2022-07-01 14:45:47 -04:00
Kirill Bulatov	1d0706cf25	Fix walreceiver connection selection mechanism * Avoid reconnecting to safekeeper immediately after its failure by limiting candidates to those with fewest connection attempts. Thus we don't have to wait lagging_wal_timeout (10s by default) before switch happens even if no new changes are generated, and current test_restarts_under_load expects some commits to happen within 4s. * Make default max_lsn_wal_lag larger, otherwise we constant reconnections happen during normal work. * Fix wal_connection_attempts maintanance, preventing busy loop of reconnections.	2022-06-30 00:40:12 +03:00
Dmitry Ivanov	5ee19b0758	Fix bloated coverage uploads (#2005 ) Move coverage data to a better directory, merge it better and don't publish it from CircleCI pipeline	2022-06-29 17:59:19 +03:00
Kirill Bulatov	cef90d9220	Disable cachepot for GH Actions builds (#2007 )	2022-06-29 17:56:02 +03:00