neon/.github/workflows/_build-and-test-locally.yml

name: Build and Test Locally

on:
  workflow_call:
    inputs:
      arch:
        description: 'x64 or arm64'
        required: true
        type: string
      build-tag:
        description: 'build tag'
        required: true
        type: string
      build-tools-image:
        description: 'build-tools image'
        required: true
        type: string
      build-type:
        description: 'debug or release'
        required: true
        type: string
      test-cfg:
        description: 'a json object of postgres versions and lfc states to run regression tests on'
        required: true
        type: string
      sanitizers:
        description: 'enabled or disabled'
        required: false
        default: 'disabled'
        type: string
      test-selection:
        description: 'specification of selected test(s) to run'
        required: false
        default: ''
        type: string
      test-run-count:
        description: 'number of runs to perform for selected tests'
        required: false
        default: 1
        type: number
      rerun-failed:
        description: 'rerun failed tests to ignore flaky tests'
        required: false
        default: true
        type: boolean

defaults:
  run:
    shell: bash -euxo pipefail {0}

env:
  RUST_BACKTRACE: 1
  COPT: '-Werror'

permissions:
  contents: read

jobs:
  build-neon:
    runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large')) }}
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      contents: read
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
      # Raise locked memory limit for tokio-epoll-uring.
      # On 5.10 LTS kernels < 5.10.162 (and generally mainline kernels < 5.12),
      # io_uring will account the memory of the CQ and SQ as locked.
      # More details: https://github.com/neondatabase/neon/issues/6373#issuecomment-1905814391
      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    env:
      BUILD_TYPE: ${{ inputs.build-type }}
      GIT_VERSION: ${{ github.event.pull_request.head.sha || github.sha }}
      BUILD_TAG: ${{ inputs.build-tag }}

    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
        with:
          egress-policy: audit

      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          submodules: true

      - name: Set pg 14 revision for caching
        id: pg_v14_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v14) >> $GITHUB_OUTPUT

      - name: Set pg 15 revision for caching
        id: pg_v15_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v15) >> $GITHUB_OUTPUT

      - name: Set pg 16 revision for caching
        id: pg_v16_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v16) >> $GITHUB_OUTPUT

      - name: Set pg 17 revision for caching
        id: pg_v17_rev
        run: echo pg_rev=$(git rev-parse HEAD:vendor/postgres-v17) >> $GITHUB_OUTPUT

      # Set some environment variables used by all the steps.
      #
      # CARGO_FLAGS is extra options to pass to "cargo build", "cargo test" etc.
      #   It also includes --features, if any
      #
      # CARGO_FEATURES is passed to "cargo metadata". It is separate from CARGO_FLAGS,
      #   because "cargo metadata" doesn't accept --release or --debug options
      #
      # We run tests with addtional features, that are turned off by default (e.g. in release builds), see
      # corresponding Cargo.toml files for their descriptions.
      - name: Set env variables
        env:
          ARCH: ${{ inputs.arch }}
          SANITIZERS: ${{ inputs.sanitizers }}
        run: |
          CARGO_FEATURES="--features testing"
          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' ]]; then
            cov_prefix="scripts/coverage --profraw-prefix=$GITHUB_JOB --dir=/tmp/coverage run"
            CARGO_FLAGS="--locked"
          elif [[ $BUILD_TYPE == "debug" ]]; then
            cov_prefix=""
            CARGO_FLAGS="--locked"
          elif [[ $BUILD_TYPE == "release" ]]; then
            cov_prefix=""
            CARGO_FLAGS="--locked --release"
          fi
          if [[ $SANITIZERS == 'enabled' ]]; then
            make_vars="WITH_SANITIZERS=yes"
          else
            make_vars=""
          fi
          {
            echo "cov_prefix=${cov_prefix}"
            echo "make_vars=${make_vars}"
            echo "CARGO_FEATURES=${CARGO_FEATURES}"
            echo "CARGO_FLAGS=${CARGO_FLAGS}"
            echo "CARGO_HOME=${GITHUB_WORKSPACE}/.cargo"
          } >> $GITHUB_ENV

      - name: Cache postgres v14 build
        id: cache_pg_14
        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
        with:
          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v14
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v14_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}

      - name: Cache postgres v15 build
        id: cache_pg_15
        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
        with:
          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v15
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v15_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}

      - name: Cache postgres v16 build
        id: cache_pg_16
        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
        with:
          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v16
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v16_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}

      - name: Cache postgres v17 build
        id: cache_pg_17
        uses: tespkg/actions-cache@b7bf5fcc2f98a52ac6080eb0fd282c2f752074b1  # v1.8.0
        with:
          endpoint: ${{ vars.HETZNER_CACHE_REGION }}.${{ vars.HETZNER_CACHE_ENDPOINT }}
          bucket: ${{ vars.HETZNER_CACHE_BUCKET }}
          accessKey: ${{ secrets.HETZNER_CACHE_ACCESS_KEY }}
          secretKey: ${{ secrets.HETZNER_CACHE_SECRET_KEY }}
          use-fallback: false
          path: pg_install/v17
          key: v1-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-pg-${{ steps.pg_v17_rev.outputs.pg_rev }}-bookworm-${{ hashFiles('Makefile', 'build-tools.Dockerfile') }}

      - name: Build postgres v14
        if: steps.cache_pg_14.outputs.cache-hit != 'true'
        run: mold -run make ${make_vars} postgres-v14 -j$(nproc)

      - name: Build postgres v15
        if: steps.cache_pg_15.outputs.cache-hit != 'true'
        run: mold -run make ${make_vars} postgres-v15 -j$(nproc)

      - name: Build postgres v16
        if: steps.cache_pg_16.outputs.cache-hit != 'true'
        run: mold -run make ${make_vars} postgres-v16 -j$(nproc)

      - name: Build postgres v17
        if: steps.cache_pg_17.outputs.cache-hit != 'true'
        run: mold -run make ${make_vars} postgres-v17 -j$(nproc)

      - name: Build neon extensions
        run: mold -run make ${make_vars} neon-pg-ext -j$(nproc)

      - name: Build walproposer-lib
        run: mold -run make ${make_vars} walproposer-lib -j$(nproc)

      - name: Run cargo build
        env:
          WITH_TESTS: ${{ inputs.sanitizers != 'enabled' && '--tests' || '' }}
        run: |
          export ASAN_OPTIONS=detect_leaks=0
          ${cov_prefix} mold -run cargo build $CARGO_FLAGS $CARGO_FEATURES --bins ${WITH_TESTS}

      # Do install *before* running rust tests because they might recompile the
      # binaries with different features/flags.
      - name: Install rust binaries
        env:
          ARCH: ${{ inputs.arch }}
          SANITIZERS: ${{ inputs.sanitizers }}
        run: |
          # Install target binaries
          mkdir -p /tmp/neon/bin/
          binaries=$(
            ${cov_prefix} cargo metadata $CARGO_FEATURES --format-version=1 --no-deps |
            jq -r '.packages[].targets[] | select(.kind | index("bin")) | .name'
          )
          for bin in $binaries; do
            SRC=target/$BUILD_TYPE/$bin
            DST=/tmp/neon/bin/$bin
            cp "$SRC" "$DST"
          done

          # Install test executables and write list of all binaries (for code coverage)
          if [[ $BUILD_TYPE == "debug" && $ARCH == 'x64' && $SANITIZERS != 'enabled' ]]; then
            # Keep bloated coverage data files away from the rest of the artifact
            mkdir -p /tmp/coverage/

            mkdir -p /tmp/neon/test_bin/

            test_exe_paths=$(
              ${cov_prefix} cargo test $CARGO_FLAGS $CARGO_FEATURES --message-format=json --no-run |
              jq -r '.executable | select(. != null)'
            )
            for bin in $test_exe_paths; do
              SRC=$bin
              DST=/tmp/neon/test_bin/$(basename $bin)

              # We don't need debug symbols for code coverage, so strip them out to make
              # the artifact smaller.
              strip "$SRC" -o "$DST"
              echo "$DST" >> /tmp/coverage/binaries.list
            done

            for bin in $binaries; do
              echo "/tmp/neon/bin/$bin" >> /tmp/coverage/binaries.list
            done
          fi

      - name: Configure AWS credentials
        uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502 # v4.0.2
        with:
          aws-region: eu-central-1
          role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
          role-duration-seconds: 18000 # 5 hours

      - name: Run rust tests
        if: ${{ inputs.sanitizers != 'enabled' }}
        env:
          NEXTEST_RETRIES: 3
        run: |
          LD_LIBRARY_PATH=$(pwd)/pg_install/v17/lib
          export LD_LIBRARY_PATH

          #nextest does not yet support running doctests
          ${cov_prefix} cargo test --doc $CARGO_FLAGS $CARGO_FEATURES

          # run all non-pageserver tests
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E '!package(pageserver)'

          # run pageserver tests
          # (When developing new pageserver features gated by config fields, we commonly make the rust
          # unit tests sensitive to an environment variable NEON_PAGESERVER_UNIT_TEST_FEATURENAME.
          # Then run the nextest invocation below for all relevant combinations. Singling out the
          # pageserver tests from non-pageserver tests cuts down the time it takes for this CI step.)
          NEON_PAGESERVER_UNIT_TEST_VIRTUAL_FILE_IOENGINE=tokio-epoll-uring  \
          ${cov_prefix} \
          cargo nextest run $CARGO_FLAGS $CARGO_FEATURES  -E 'package(pageserver)'

          # Run separate tests for real S3
          export ENABLE_REAL_S3_REMOTE_STORAGE=nonempty
          export REMOTE_STORAGE_S3_BUCKET=neon-github-ci-tests
          export REMOTE_STORAGE_S3_REGION=eu-central-1
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_s3)'

          # Run separate tests for real Azure Blob Storage
          # XXX: replace region with `eu-central-1`-like region
          export ENABLE_REAL_AZURE_REMOTE_STORAGE=y
          export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}"
          export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}"
          export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}"
          export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}"
          ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)'

      - name: Install postgres binaries
        run: |
          # Use tar to copy files matching the pattern, preserving the paths in the destionation
          tar c \
            pg_install/v* \
            pg_install/build/*/src/test/regress/*.so \
            pg_install/build/*/src/test/regress/pg_regress \
            pg_install/build/*/src/test/isolation/isolationtester \
            pg_install/build/*/src/test/isolation/pg_isolation_regress \
            | tar  x -C /tmp/neon

      - name: Upload Neon artifact
        uses: ./.github/actions/upload
        with:
          name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact
          path: /tmp/neon
          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}

      - name: Check diesel schema
        if: inputs.build-type == 'release' && inputs.arch == 'x64'
        env:
          DATABASE_URL: postgresql://localhost:1235/storage_controller
          POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install
        run: |
          export ASAN_OPTIONS=detect_leaks=0
          /tmp/neon/bin/neon_local init
          /tmp/neon/bin/neon_local storage_controller start

          diesel print-schema > storage_controller/src/schema.rs

          if [ -n "$(git diff storage_controller/src/schema.rs)" ]; then
            echo >&2 "Uncommitted changes in diesel schema"

            git diff .
            exit 1
          fi

          /tmp/neon/bin/neon_local storage_controller stop

      # XXX: keep this after the binaries.list is formed, so the coverage can properly work later
      - name: Merge and upload coverage data
        if: inputs.build-type == 'debug'
        uses: ./.github/actions/save-coverage-data

  regress-tests:
    # Don't run regression tests on debug arm64 builds
    if: inputs.build-type != 'debug' || inputs.arch != 'arm64'
    permissions:
      id-token: write # aws-actions/configure-aws-credentials
      contents: read
      statuses: write
    needs: [ build-neon ]
    runs-on: ${{ fromJSON(format('["self-hosted", "{0}"]', inputs.arch == 'arm64' && 'large-arm64' || 'large-metal')) }}
    container:
      image: ${{ inputs.build-tools-image }}
      credentials:
        username: ${{ github.actor }}
        password: ${{ secrets.GITHUB_TOKEN }}
      # for changed limits, see comments on `options:` earlier in this file
      options: --init --shm-size=512mb --ulimit memlock=67108864:67108864
    strategy:
      fail-fast: false
      matrix: ${{ fromJSON(format('{{"include":{0}}}', inputs.test-cfg)) }}
    steps:
      - name: Harden the runner (Audit all outbound calls)
        uses: step-security/harden-runner@4d991eb9b905ef189e4c376166672c3f2f230481 # v2.11.0
        with:
          egress-policy: audit

      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
        with:
          submodules: true

      - name: Pytest regression tests
        continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }}
        uses: ./.github/actions/run-python-test-set
        timeout-minutes: ${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 75 || 180 }}
        with:
          build_type: ${{ inputs.build-type }}
          test_selection: regress
          needs_postgres_source: true
          run_with_real_s3: true
          real_s3_bucket: neon-github-ci-tests
          real_s3_region: eu-central-1
          rerun_failed: ${{ inputs.rerun-failed }}
          pg_version: ${{ matrix.pg_version }}
          sanitizers: ${{ inputs.sanitizers }}
          aws-oidc-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }}
          # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds.
          # Attempt to stop tests gracefully to generate test reports
          # until they are forcibly stopped by the stricter `timeout-minutes` limit.
          extra_params: --session-timeout=${{ (inputs.build-type == 'release' && inputs.sanitizers != 'enabled') && 3000 || 10200 }} --count=${{ inputs.test-run-count }}
                        ${{ inputs.test-selection != '' && format('-k "{0}"', inputs.test-selection) || '' }}
        env:
          TEST_RESULT_CONNSTR: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }}
          CHECK_ONDISK_DATA_COMPATIBILITY: nonempty
          BUILD_TAG: ${{ inputs.build-tag }}
          PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring
          USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }}

      # Temporary disable this step until we figure out why it's so flaky
      # Ref https://github.com/neondatabase/neon/issues/4540
      - name: Merge and upload coverage data
        if: |
          false &&
          inputs.build-type == 'debug' && matrix.pg_version == 'v16'
        uses: ./.github/actions/save-coverage-data