diff --git a/.github/ansible/production.hosts b/.github/ansible/production.hosts index 6a3a7791ad..d22ce0e37e 100644 --- a/.github/ansible/production.hosts +++ b/.github/ansible/production.hosts @@ -1,6 +1,7 @@ [pageservers] #zenith-1-ps-1 console_region_id=1 zenith-1-ps-2 console_region_id=1 +zenith-1-ps-3 console_region_id=1 [safekeepers] zenith-1-sk-1 console_region_id=1 diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 857e9e3533..3a12d19428 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -11,8 +11,9 @@ defaults: shell: bash -ex {0} concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true + # Allow only one workflow per any non-`main` branch. + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + cancel-in-progress: true env: RUST_BACKTRACE: 1 @@ -441,14 +442,14 @@ jobs: fi id: legacy-build-tag - - name: Build compute-tools Docker image + - name: Build neon Docker image uses: docker/build-push-action@v2 with: context: . build-args: | - GIT_VERSION="${GITHUB_SHA}" - AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" - AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" + GIT_VERSION="${{github.sha}}" + AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}" + AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}" pull: true push: true tags: neondatabase/neon:${{steps.legacy-build-tag.outputs.tag}}, neondatabase/neon:${{steps.build-tag.outputs.tag}} @@ -508,8 +509,9 @@ jobs: with: context: . build-args: | - AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" - AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" + GIT_VERSION="${{github.sha}}" + AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}" + AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}" push: false file: Dockerfile.compute-tools tags: neondatabase/compute-tools:local @@ -519,8 +521,9 @@ jobs: with: context: . build-args: | - AWS_ACCESS_KEY_ID="${CACHEPOT_AWS_ACCESS_KEY_ID}" - AWS_SECRET_ACCESS_KEY="${CACHEPOT_AWS_SECRET_ACCESS_KEY}" + GIT_VERSION="${{github.sha}}" + AWS_ACCESS_KEY_ID="${{secrets.CACHEPOT_AWS_ACCESS_KEY_ID}}" + AWS_SECRET_ACCESS_KEY="${{secrets.CACHEPOT_AWS_SECRET_ACCESS_KEY}}" push: true file: Dockerfile.compute-tools tags: neondatabase/compute-tools:${{steps.legacy-build-tag.outputs.tag}} @@ -558,7 +561,11 @@ jobs: deploy: runs-on: [ self-hosted, Linux, k8s-runner ] - needs: [ docker-image, calculate-deploy-targets ] + # We need both storage **and** compute images for deploy, because control plane + # picks the compute version based on the storage version. If it notices a fresh + # storage it may bump the compute version. And if compute image failed to build + # it may break things badly. + needs: [ docker-image, docker-image-compute, calculate-deploy-targets ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' @@ -601,7 +608,9 @@ jobs: deploy-proxy: runs-on: [ self-hosted, Linux, k8s-runner ] - needs: [ docker-image, calculate-deploy-targets ] + # Compute image isn't strictly required for proxy deploy, but let's still wait for it + # to run all deploy jobs consistently. + needs: [ docker-image, docker-image-compute, calculate-deploy-targets ] if: | (github.ref_name == 'main' || github.ref_name == 'release') && github.event_name != 'workflow_dispatch' diff --git a/.github/workflows/codestyle.yml b/.github/workflows/codestyle.yml index 2b8a01e94e..345c1d5397 100644 --- a/.github/workflows/codestyle.yml +++ b/.github/workflows/codestyle.yml @@ -11,8 +11,9 @@ defaults: shell: bash -ex {0} concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true + # Allow only one workflow per any non-`main` branch. + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + cancel-in-progress: true env: RUST_BACKTRACE: 1 diff --git a/.github/workflows/pg_clients.yml b/.github/workflows/pg_clients.yml index fe4dbea8ac..4ff31ac508 100644 --- a/.github/workflows/pg_clients.yml +++ b/.github/workflows/pg_clients.yml @@ -13,8 +13,9 @@ on: workflow_dispatch: concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true + # Allow only one workflow per any non-`main` branch. + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.ref == 'refs/heads/main' && github.sha || 'anysha' }} + cancel-in-progress: true jobs: test-postgres-client-libs: diff --git a/test_runner/batch_others/test_wal_acceptor_async.py b/test_runner/batch_others/test_wal_acceptor_async.py index 4664c332fc..d74ef8840a 100644 --- a/test_runner/batch_others/test_wal_acceptor_async.py +++ b/test_runner/batch_others/test_wal_acceptor_async.py @@ -302,6 +302,8 @@ def test_compute_restarts(neon_env_builder: NeonEnvBuilder): class BackgroundCompute(object): + MAX_QUERY_GAP_SECONDS = 2 + def __init__(self, index: int, env: NeonEnv, branch: str): self.index = index self.env = env @@ -339,7 +341,7 @@ class BackgroundCompute(object): # With less sleep, there is a very big chance of not committing # anything or only 1 xact during test run. - await asyncio.sleep(2 * random.random()) + await asyncio.sleep(random.uniform(0, self.MAX_QUERY_GAP_SECONDS)) self.running = False @@ -356,20 +358,34 @@ async def run_concurrent_computes(env: NeonEnv, background_tasks = [asyncio.create_task(compute.run()) for compute in computes] await asyncio.sleep(run_seconds) + log.info("stopping all tasks but one") for compute in computes[1:]: compute.stopped = True + await asyncio.gather(*background_tasks[1:]) log.info("stopped all tasks but one") # work for some time with only one compute -- it should be able to make some xacts - await asyncio.sleep(8) + TIMEOUT_SECONDS = computes[0].MAX_QUERY_GAP_SECONDS + 3 + initial_queries_by_0 = len(computes[0].successful_queries) + log.info(f'Waiting for another query by computes[0], ' + f'it already had {initial_queries_by_0}, timeout is {TIMEOUT_SECONDS}s') + for _ in range(10 * TIMEOUT_SECONDS): + current_queries_by_0 = len(computes[0].successful_queries) - initial_queries_by_0 + if current_queries_by_0 >= 1: + log.info(f'Found {current_queries_by_0} successful queries ' + f'by computes[0], completing the test') + break + await asyncio.sleep(0.1) + else: + assert False, "Timed out while waiting for another query by computes[0]" computes[0].stopped = True - await asyncio.gather(*background_tasks) + await asyncio.gather(background_tasks[0]) result = await exec_compute_query(env, branch, 'SELECT * FROM query_log') # we should have inserted something while single compute was running - assert len(result) >= 4 - log.info(f'Executed {len(result)} queries') + log.info(f'Executed {len(result)} queries, {current_queries_by_0} of them ' + f'by computes[0] after we started stopping the others') for row in result: log.info(f'{row[0]} {row[1]} {row[2]}')