From 1f0dea9a1a6b7f5f575f3f64ac0725f85e9535f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Mon, 24 Feb 2025 18:45:23 +0100 Subject: [PATCH 001/207] feat(ci): push container images to ghcr.io as well (#10945) ## Problem There's new rate-limits coming on docker hub. To reduce our reliance on docker hub and the problems the limits are going to cause for us, we want to prepare for this by also pushing our container images to ghcr.io ## Summary of changes Push our images to ghcr.io as well and not just docker hub. --- .../workflows/_push-to-container-registry.yml | 35 +++++++------- .github/workflows/build_and_test.yml | 48 ++++++++++--------- .github/workflows/pin-build-tools-image.yml | 12 +++-- scripts/generate_image_maps.py | 1 + 4 files changed, 52 insertions(+), 44 deletions(-) diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml index c938f62ad5..403d078988 100644 --- a/.github/workflows/_push-to-container-registry.yml +++ b/.github/workflows/_push-to-container-registry.yml @@ -11,8 +11,12 @@ on: description: AWS region to log in to. Required when pushing to ECR. required: false type: string - aws-account-ids: - description: Comma separated AWS account IDs to log in to for pushing to ECR. Required when pushing to ECR. + aws-account-id: + description: AWS account ID to log in to for pushing to ECR. Required when pushing to ECR. + required: false + type: string + aws-role-to-assume: + description: AWS role to assume to for pushing to ECR. Required when pushing to ECR. required: false type: string azure-client-id: @@ -31,16 +35,6 @@ on: description: ACR registry name. Required when pushing to ACR. required: false type: string - secrets: - docker-hub-username: - description: Docker Hub username. Required when pushing to Docker Hub. - required: false - docker-hub-password: - description: Docker Hub password. Required when pushing to Docker Hub. - required: false - aws-role-to-assume: - description: AWS role to assume. Required when pushing to ECR. - required: false permissions: {} @@ -53,6 +47,7 @@ jobs: runs-on: ubuntu-22.04 permissions: id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR steps: - uses: actions/checkout@v4 with: @@ -67,14 +62,14 @@ jobs: uses: aws-actions/configure-aws-credentials@v4 with: aws-region: "${{ inputs.aws-region }}" - role-to-assume: "${{ secrets.aws-role-to-assume }}" + role-to-assume: "arn:aws:iam::${{ inputs.aws-account-id }}:role/${{ inputs.aws-role-to-assume }}" role-duration-seconds: 3600 - name: Login to ECR if: contains(inputs.image-map, 'amazonaws.com/') uses: aws-actions/amazon-ecr-login@v2 with: - registries: "${{ inputs.aws-account-ids }}" + registries: "${{ inputs.aws-account-id }}" - name: Configure Azure credentials if: contains(inputs.image-map, 'azurecr.io/') @@ -89,11 +84,19 @@ jobs: run: | az acr login --name=${{ inputs.acr-registry-name }} + - name: Login to GHCR + if: contains(inputs.image-map, 'ghcr.io/') + uses: docker/login-action@v3 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Log in to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.docker-hub-username }} - password: ${{ secrets.docker-hub-password }} + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - name: Copy docker images to target registries run: python scripts/push_with_image_map.py diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 8f3392ceea..1b706b3f16 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -866,68 +866,72 @@ jobs: push-neon-image-dev: needs: [ generate-image-maps, neon-image ] uses: ./.github/workflows/_push-to-container-registry.yml + permissions: + id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR with: image-map: '${{ needs.generate-image-maps.outputs.neon-dev }}' aws-region: ${{ vars.AWS_ECR_REGION }} - aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-role-to-assume: "gha-oidc-neon-admin" azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }} azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }} azure-tenant-id: ${{ vars.AZURE_TENANT_ID }} acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }} - secrets: - aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}" - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit push-compute-image-dev: needs: [ generate-image-maps, vm-compute-node-image ] uses: ./.github/workflows/_push-to-container-registry.yml + permissions: + id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR with: image-map: '${{ needs.generate-image-maps.outputs.compute-dev }}' aws-region: ${{ vars.AWS_ECR_REGION }} - aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-role-to-assume: "gha-oidc-neon-admin" azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }} azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }} azure-tenant-id: ${{ vars.AZURE_TENANT_ID }} acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }} - secrets: - aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}" - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit push-neon-image-prod: if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' needs: [ generate-image-maps, neon-image, test-images ] uses: ./.github/workflows/_push-to-container-registry.yml + permissions: + id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR with: image-map: '${{ needs.generate-image-maps.outputs.neon-prod }}' aws-region: ${{ vars.AWS_ECR_REGION }} - aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}" + aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}" + aws-role-to-assume: "gha-oidc-neon-admin" azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }} azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }} azure-tenant-id: ${{ vars.AZURE_TENANT_ID }} acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }} - secrets: - aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}" - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit push-compute-image-prod: if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' needs: [ generate-image-maps, vm-compute-node-image, test-images ] uses: ./.github/workflows/_push-to-container-registry.yml + permissions: + id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR with: image-map: '${{ needs.generate-image-maps.outputs.compute-prod }}' aws-region: ${{ vars.AWS_ECR_REGION }} - aws-account-ids: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}" + aws-account-id: "${{ vars.NEON_PROD_AWS_ACCOUNT_ID }}" + aws-role-to-assume: "gha-oidc-neon-admin" azure-client-id: ${{ vars.AZURE_PROD_CLIENT_ID }} azure-subscription-id: ${{ vars.AZURE_PROD_SUBSCRIPTION_ID }} azure-tenant-id: ${{ vars.AZURE_TENANT_ID }} acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }} - secrets: - aws-role-to-assume: "${{ secrets.PROD_GHA_OIDC_ROLE }}" - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit # This is a bit of a special case so we're not using a generated image map. add-latest-tag-to-neon-extensions-test-image: @@ -940,9 +944,7 @@ jobs: "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"], "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"] } - secrets: - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit trigger-custom-extensions-build-and-wait: needs: [ check-permissions, tag ] diff --git a/.github/workflows/pin-build-tools-image.yml b/.github/workflows/pin-build-tools-image.yml index b305b662ee..d2588ba0bf 100644 --- a/.github/workflows/pin-build-tools-image.yml +++ b/.github/workflows/pin-build-tools-image.yml @@ -65,6 +65,7 @@ jobs: permissions: id-token: write # Required for aws/azure login + packages: write # required for pushing to GHCR uses: ./.github/workflows/_push-to-container-registry.yml with: @@ -72,12 +73,15 @@ jobs: { "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bullseye": [ "docker.io/neondatabase/build-tools:pinned-bullseye", + "ghcr.io/neondatabase/build-tools:pinned-bullseye", "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bullseye", "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bullseye" ], "docker.io/neondatabase/build-tools:${{ inputs.from-tag }}-bookworm": [ "docker.io/neondatabase/build-tools:pinned-bookworm", "docker.io/neondatabase/build-tools:pinned", + "ghcr.io/neondatabase/build-tools:pinned-bookworm", + "ghcr.io/neondatabase/build-tools:pinned", "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned-bookworm", "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/build-tools:pinned", "${{ vars.AZURE_DEV_REGISTRY_NAME }}.azurecr.io/neondatabase/build-tools:pinned-bookworm", @@ -85,12 +89,10 @@ jobs: ] } aws-region: ${{ vars.AWS_ECR_REGION }} - aws-account-ids: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-account-id: "${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}" + aws-role-to-assume: "gha-oidc-neon-admin" azure-client-id: ${{ vars.AZURE_DEV_CLIENT_ID }} azure-subscription-id: ${{ vars.AZURE_DEV_SUBSCRIPTION_ID }} azure-tenant-id: ${{ vars.AZURE_TENANT_ID }} acr-registry-name: ${{ vars.AZURE_DEV_REGISTRY_NAME }} - secrets: - aws-role-to-assume: "${{ vars.DEV_AWS_OIDC_ROLE_ARN }}" - docker-hub-username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} - docker-hub-password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + secrets: inherit diff --git a/scripts/generate_image_maps.py b/scripts/generate_image_maps.py index 915eb33673..39ece5b38f 100644 --- a/scripts/generate_image_maps.py +++ b/scripts/generate_image_maps.py @@ -27,6 +27,7 @@ components = { registries = { "dev": [ "docker.io/neondatabase", + "ghcr.io/neondatabase", f"{dev_aws}.dkr.ecr.{aws_region}.amazonaws.com", f"{dev_acr}.azurecr.io/neondatabase", ], From 8fd0f89b9499f38c8abf634ce068c235ef0305eb Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Mon, 24 Feb 2025 18:50:49 +0100 Subject: [PATCH 002/207] rename libduckdb.so in pg_duckdb context to avoid conflict with pg_mooncake (#10915) ## Problem Introducing pg_duckdb caused a conflict with pg_mooncake. Both use libduckdb.so in different versions. ## Summary of changes - Rename the libduckdb.so to libduckdb_pg_duckdb.so in the context of pg_duckdb so that it doesn't conflict with libduckdb.so referenced by pg_mooncake. - use a version map to rename the duckdb symbols to a version specific name - DUCKDB_1.1.3 for pg_mooncake - DUCKDB_1.2.0 for pg_duckdb For the concept of version maps see - https://www.man7.org/conf/lca2006/shared_libraries/slide19a.html - https://peeterjoot.com/2019/09/20/an-example-of-linux-glibc-symbol-versioning/ - https://akkadia.org/drepper/dsohowto.pdf --- compute/compute-node.Dockerfile | 13 +++--- compute/patches/duckdb_v113.patch | 25 +++++++++++ compute/patches/duckdb_v120.patch | 67 ++++++++++++++++++++++++++++ compute/patches/pg_duckdb_v031.patch | 22 +++++++++ 4 files changed, 121 insertions(+), 6 deletions(-) create mode 100644 compute/patches/duckdb_v113.patch create mode 100644 compute/patches/duckdb_v120.patch diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index ef4c22612d..a74291fdb4 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1458,9 +1458,11 @@ RUN make -j $(getconf _NPROCESSORS_ONLN) && \ FROM build-deps AS pg_mooncake-src ARG PG_VERSION WORKDIR /ext-src +COPY compute/patches/duckdb_v113.patch . RUN wget https://github.com/Mooncake-Labs/pg_mooncake/releases/download/v0.1.2/pg_mooncake-0.1.2.tar.gz -O pg_mooncake.tar.gz && \ echo "4550473784fcdd2e1e18062bc01eb9c286abd27cdf5e11a4399be6c0a426ba90 pg_mooncake.tar.gz" | sha256sum --check && \ mkdir pg_mooncake-src && cd pg_mooncake-src && tar xzf ../pg_mooncake.tar.gz --strip-components=1 -C . && \ + cd third_party/duckdb && patch -p1 < /ext-src/duckdb_v113.patch && cd ../.. && \ echo "make -f pg_mooncake-src/Makefile.build installcheck TEST_DIR=./test SQL_DIR=./sql SRC_DIR=./src" > neon-test.sh && \ chmod a+x neon-test.sh @@ -1480,6 +1482,7 @@ RUN make release -j $(getconf _NPROCESSORS_ONLN) && \ FROM build-deps AS pg_duckdb-src WORKDIR /ext-src COPY compute/patches/pg_duckdb_v031.patch . +COPY compute/patches/duckdb_v120.patch . # pg_duckdb build requires source dir to be a git repo to get submodules # allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: # - extension management function duckdb.install_extension() @@ -1487,7 +1490,9 @@ COPY compute/patches/pg_duckdb_v031.patch . RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \ cd pg_duckdb-src && \ git submodule update --init --recursive && \ - patch -p1 < /ext-src/pg_duckdb_v031.patch + patch -p1 < /ext-src/pg_duckdb_v031.patch && \ + cd third_party/duckdb && \ + patch -p1 < /ext-src/duckdb_v120.patch FROM pg-build AS pg_duckdb-build ARG PG_VERSION @@ -1676,11 +1681,7 @@ COPY --from=pg_anon-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg_ivm-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg_partman-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg_mooncake-build /usr/local/pgsql/ /usr/local/pgsql/ - -# Disabled temporarily, because it clashed with pg_mooncake. pg_mooncake -# also depends on libduckdb, but a different version. -#COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/ - +COPY --from=pg_duckdb-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pg_repack-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pgaudit-build /usr/local/pgsql/ /usr/local/pgsql/ COPY --from=pgauditlogtofile-build /usr/local/pgsql/ /usr/local/pgsql/ diff --git a/compute/patches/duckdb_v113.patch b/compute/patches/duckdb_v113.patch new file mode 100644 index 0000000000..b7b43b88bf --- /dev/null +++ b/compute/patches/duckdb_v113.patch @@ -0,0 +1,25 @@ +diff --git a/libduckdb.map b/libduckdb.map +new file mode 100644 +index 0000000000..3b56f00cd7 +--- /dev/null ++++ b/libduckdb.map +@@ -0,0 +1,6 @@ ++DUCKDB_1.1.3 { ++ global: ++ *duckdb*; ++ local: ++ *; ++}; +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 3e757a4bcc..88ab4005b9 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -135,6 +135,8 @@ else() + target_link_libraries(duckdb ${DUCKDB_LINK_LIBS}) + link_threads(duckdb) + link_extension_libraries(duckdb) ++ target_link_options(duckdb PRIVATE ++ -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb.map) + + add_library(duckdb_static STATIC ${ALL_OBJECT_FILES}) + target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS}) diff --git a/compute/patches/duckdb_v120.patch b/compute/patches/duckdb_v120.patch new file mode 100644 index 0000000000..cf317736a5 --- /dev/null +++ b/compute/patches/duckdb_v120.patch @@ -0,0 +1,67 @@ +diff --git a/libduckdb_pg_duckdb.map b/libduckdb_pg_duckdb.map +new file mode 100644 +index 0000000000..0872978b48 +--- /dev/null ++++ b/libduckdb_pg_duckdb.map +@@ -0,0 +1,6 @@ ++DUCKDB_1.2.0 { ++ global: ++ *duckdb*; ++ local: ++ *; ++}; +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index 58adef3fc0..2c522f91be 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -59,7 +59,7 @@ endfunction() + + if(AMALGAMATION_BUILD) + +- add_library(duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp") ++ add_library(duckdb_pg_duckdb SHARED "${PROJECT_SOURCE_DIR}/src/amalgamation/duckdb.cpp") + target_link_libraries(duckdb ${DUCKDB_SYSTEM_LIBS}) + link_threads(duckdb) + link_extension_libraries(duckdb) +@@ -109,7 +109,7 @@ else() + duckdb_yyjson + duckdb_zstd) + +- add_library(duckdb SHARED ${ALL_OBJECT_FILES}) ++ add_library(duckdb_pg_duckdb SHARED ${ALL_OBJECT_FILES}) + + if(WIN32 AND NOT MINGW) + ensure_variable_is_number(DUCKDB_MAJOR_VERSION RC_MAJOR_VERSION) +@@ -131,9 +131,11 @@ else() + target_sources(duckdb PRIVATE version.rc) + endif() + +- target_link_libraries(duckdb ${DUCKDB_LINK_LIBS}) +- link_threads(duckdb) +- link_extension_libraries(duckdb) ++ target_link_libraries(duckdb_pg_duckdb ${DUCKDB_LINK_LIBS}) ++ link_threads(duckdb_pg_duckdb) ++ link_extension_libraries(duckdb_pg_duckdb) ++ target_link_options(duckdb_pg_duckdb PRIVATE ++ -Wl,--version-script=${CMAKE_SOURCE_DIR}/libduckdb_pg_duckdb.map) + + add_library(duckdb_static STATIC ${ALL_OBJECT_FILES}) + target_link_libraries(duckdb_static ${DUCKDB_LINK_LIBS}) +@@ -141,7 +143,7 @@ else() + link_extension_libraries(duckdb_static) + + target_include_directories( +- duckdb PUBLIC $ ++ duckdb_pg_duckdb PUBLIC $ + $) + + target_include_directories( +@@ -161,7 +163,7 @@ else() + endif() + + install( +- TARGETS duckdb duckdb_static ++ TARGETS duckdb_pg_duckdb duckdb_static + EXPORT "${DUCKDB_EXPORT_SET}" + LIBRARY DESTINATION "${INSTALL_LIB_DIR}" + ARCHIVE DESTINATION "${INSTALL_LIB_DIR}" diff --git a/compute/patches/pg_duckdb_v031.patch b/compute/patches/pg_duckdb_v031.patch index a7e188d69e..edc7fbf69d 100644 --- a/compute/patches/pg_duckdb_v031.patch +++ b/compute/patches/pg_duckdb_v031.patch @@ -1,3 +1,25 @@ +diff --git a/Makefile b/Makefile +index 3235cc8..6b892bc 100644 +--- a/Makefile ++++ b/Makefile +@@ -32,7 +32,7 @@ else + DUCKDB_BUILD_TYPE = release + endif + +-DUCKDB_LIB = libduckdb$(DLSUFFIX) ++DUCKDB_LIB = libduckdb_pg_duckdb$(DLSUFFIX) + FULL_DUCKDB_LIB = third_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src/$(DUCKDB_LIB) + + ERROR_ON_WARNING ?= +@@ -54,7 +54,7 @@ override PG_CXXFLAGS += -std=c++17 ${DUCKDB_BUILD_CXX_FLAGS} ${COMPILER_FLAGS} - + # changes to the vendored code in one place. + override PG_CFLAGS += -Wno-declaration-after-statement + +-SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb -lstdc++ -llz4 ++SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -Lthird_party/duckdb/build/$(DUCKDB_BUILD_TYPE)/src -L$(PG_LIB) -lduckdb_pg_duckdb -lstdc++ -llz4 + + include Makefile.global + diff --git a/sql/pg_duckdb--0.2.0--0.3.0.sql b/sql/pg_duckdb--0.2.0--0.3.0.sql index d777d76..af60106 100644 --- a/sql/pg_duckdb--0.2.0--0.3.0.sql From 565a9e62a1e865a0de1966b1afa146b958d39396 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 24 Feb 2025 22:16:37 +0200 Subject: [PATCH 003/207] compute: Disconnect if no response to a pageserver request is received (#10882) We've seen some cases in production where a compute doesn't get a response to a pageserver request for several minutes, or even more. We haven't found the root cause for that yet, but whatever the reason is, it seems overly optimistic to think that if the pageserver hasn't responded for 2 minutes, we'd get a response if we just wait patiently a little longer. More likely, the pageserver is dead or there's some kind of a network glitch so that the TCP connection is dead, or at least stuck for a long time. Either way, it's better to disconnect and reconnect. I set the default timeout to 2 minutes, which should be enough for any GetPage request under normal circumstances, even if the pageserver has to download several layer files from remote storage. Make the disconnect timeout configurable. Also make the "log interval", after which we print a message to the log configurable, so that if you change the disconnect timeout, you can set the log timeout correspondingly. The default log interval is still 10 s. The new GUCs are called "neon.pageserver_response_log_timeout" and "neon.pageserver_response_disconnect_timeout". Includes a basic test for the log and disconnect timeouts. Implements issue #10857 --- pageserver/src/page_service.rs | 3 + pgxn/neon/libpagestore.c | 215 +++++++++++++++------ test_runner/regress/test_bad_connection.py | 191 ++++++++++++++++++ 3 files changed, 347 insertions(+), 62 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index b9b8e32753..cab3d76bf8 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -45,6 +45,7 @@ use utils::sync::gate::{Gate, GateGuard}; use utils::sync::spsc_fold; use utils::{ auth::{Claims, Scope, SwappableJwtAuth}, + failpoint_support, id::{TenantId, TimelineId}, lsn::Lsn, simple_rcu::RcuReadGuard, @@ -1298,6 +1299,8 @@ impl PageServerHandler { &response_msg.serialize(protocol_version), ))?; + failpoint_support::sleep_millis_async!("before-pagestream-msg-flush", cancel); + // what we want to do let socket_fd = pgb_writer.socket_fd; let flush_fut = pgb_writer.flush(); diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index f5801b379b..f71f11ff93 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -14,6 +14,8 @@ */ #include "postgres.h" +#include + #include "access/xlog.h" #include "common/hashfn.h" #include "fmgr.h" @@ -61,6 +63,9 @@ int neon_protocol_version = 2; static int max_reconnect_attempts = 60; static int stripe_size; +static int pageserver_response_log_timeout = 10000; +static int pageserver_response_disconnect_timeout = 120000; /* 2 minutes */ + typedef struct { char connstring[MAX_SHARDS][MAX_PAGESERVER_CONNSTRING_SIZE]; @@ -129,6 +134,11 @@ typedef struct uint64 nrequests_sent; uint64 nresponses_received; + /* State for the receive timeout mechanism in call_PQgetCopyData() */ + instr_time receive_start_time; /* when we started waiting */ + instr_time receive_last_log_time; /* when we last printed a log message for the wait */ + bool receive_logged; /* has the wait been logged */ + /*--- * WaitEventSet containing: * - WL_SOCKET_READABLE on 'conn' @@ -661,6 +671,9 @@ pageserver_connect(shardno_t shard_no, int elevel) shard->state = PS_Connected; shard->nrequests_sent = 0; shard->nresponses_received = 0; + INSTR_TIME_SET_ZERO(shard->receive_start_time); + INSTR_TIME_SET_ZERO(shard->receive_last_log_time); + shard->receive_logged = false; } /* FALLTHROUGH */ case PS_Connected: @@ -680,6 +693,33 @@ pageserver_connect(shardno_t shard_no, int elevel) Assert(false); } +static void +get_socket_stats(int socketfd, int *sndbuf, int *recvbuf) +{ + *sndbuf = -1; + *recvbuf = -1; + +#ifdef __linux__ + /* + * get kernel's send and recv queue size via ioctl + * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27 + */ + if (socketfd != -1) + { + int ioctl_err; + + ioctl_err = ioctl(socketfd, SIOCOUTQ, sndbuf); + if (ioctl_err!= 0) { + *sndbuf = -errno; + } + ioctl_err = ioctl(socketfd, FIONREAD, recvbuf); + if (ioctl_err != 0) { + *recvbuf = -errno; + } + } +#endif +} + /* * A wrapper around PQgetCopyData that checks for interrupts while sleeping. */ @@ -690,26 +730,8 @@ call_PQgetCopyData(shardno_t shard_no, char **buffer) PageServer *shard = &page_servers[shard_no]; PGconn *pageserver_conn = shard->conn; instr_time now, - start_ts, since_start, - last_log_ts, since_last_log; - bool logged = false; - - /* - * As a debugging aid, if we don't get a response for a long time, print a - * log message. - * - * 10 s is a very generous threshold, normally we expect a response in a - * few milliseconds. We have metrics to track latencies in normal ranges, - * but in the cases that take exceptionally long, it's useful to log the - * exact timestamps. - */ -#define LOG_INTERVAL_MS INT64CONST(10 * 1000) - - INSTR_TIME_SET_CURRENT(now); - start_ts = last_log_ts = now; - INSTR_TIME_SET_ZERO(since_last_log); retry: ret = PQgetCopyData(pageserver_conn, buffer, 1 /* async */ ); @@ -718,11 +740,36 @@ retry: { WaitEvent occurred_event; int noccurred; + double log_timeout, + disconnect_timeout; long timeout; - timeout = Max(0, LOG_INTERVAL_MS - INSTR_TIME_GET_MILLISEC(since_last_log)); + /* + * Calculate time elapsed since the start, and since the last progress + * log message. On first call, remember the start time. + */ + INSTR_TIME_SET_CURRENT(now); + if (INSTR_TIME_IS_ZERO(shard->receive_start_time)) + { + shard->receive_start_time = now; + INSTR_TIME_SET_ZERO(since_start); + shard->receive_last_log_time = now; + INSTR_TIME_SET_ZERO(since_last_log); + shard->receive_logged = false; + } + else + { + since_start = now; + INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time); + since_last_log = now; + INSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time); + } + + /* Sleep until the log or disconnect timeout is reached. */ + log_timeout = Max(0, (double) pageserver_response_log_timeout - INSTR_TIME_GET_MILLISEC(since_last_log)); + disconnect_timeout = Max(0, (double) pageserver_response_disconnect_timeout - INSTR_TIME_GET_MILLISEC(since_start)); + timeout = (long) ceil(Min(log_timeout, disconnect_timeout)); - /* Sleep until there's something to do */ noccurred = WaitEventSetWait(shard->wes_read, timeout, &occurred_event, 1, WAIT_EVENT_NEON_PS_READ); ResetLatch(MyLatch); @@ -740,49 +787,61 @@ retry: pfree(msg); return -1; } + goto retry; + } + + /* Timeout was reached, or we were interrupted for some other reason */ + INSTR_TIME_SET_CURRENT(now); + since_last_log = now; + INSTR_TIME_SUBTRACT(since_last_log, shard->receive_last_log_time); + since_start = now; + INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time); + + /* + * As a debugging aid, if we don't get a response to a pageserver request + * for a long time, print a log message. + * + * The default neon.pageserver_response_log_timeout value, 10 s, is + * very generous. Normally we expect a response in a few + * milliseconds. We have metrics to track latencies in normal ranges, + * but in the cases that take exceptionally long, it's useful to log + * the exact timestamps. + */ + if (INSTR_TIME_GET_MILLISEC(since_last_log) >= pageserver_response_log_timeout) + { + int sndbuf; + int recvbuf; + + get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf); + + neon_shard_log(shard_no, LOG, + "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)", + INSTR_TIME_GET_DOUBLE(since_start), + shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf); + shard->receive_last_log_time = now; + shard->receive_logged = true; } /* - * Print a message to the log if a long time has passed with no - * response. + * If an even longer time has passed without receiving a response from + * the pageserver, disconnect. That triggers a reconnection attempt + * in the caller. + * + * If this happens, the pageserver is likely dead and isn't coming + * back, or there's some kind of a network glitch and the connection + * is permanently gone. Without this, if the pageserver or the network + * connection is dead, it could take a very long time (15 minutes or + * more) until the TCP keepalive timeout notices that. Even if we + * would in fact get a response if we just waited a little longer, + * there's a good chance that we'll get the response sooner by + * reconnecting. */ - INSTR_TIME_SET_CURRENT(now); - since_last_log = now; - INSTR_TIME_SUBTRACT(since_last_log, last_log_ts); - if (INSTR_TIME_GET_MILLISEC(since_last_log) >= LOG_INTERVAL_MS) + if (INSTR_TIME_GET_MILLISEC(since_start) >= pageserver_response_disconnect_timeout) { - int sndbuf = -1; - int recvbuf = -1; -#ifdef __linux__ - int socketfd; -#endif - - since_start = now; - INSTR_TIME_SUBTRACT(since_start, start_ts); - -#ifdef __linux__ - /* - * get kernel's send and recv queue size via ioctl - * https://elixir.bootlin.com/linux/v6.1.128/source/include/uapi/linux/sockios.h#L25-L27 - */ - socketfd = PQsocket(pageserver_conn); - if (socketfd != -1) { - int ioctl_err; - ioctl_err = ioctl(socketfd, SIOCOUTQ, &sndbuf); - if (ioctl_err!= 0) { - sndbuf = -errno; - } - ioctl_err = ioctl(socketfd, FIONREAD, &recvbuf); - if (ioctl_err != 0) { - recvbuf = -errno; - } - } -#endif - neon_shard_log(shard_no, LOG, "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)", - INSTR_TIME_GET_DOUBLE(since_start), - shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf); - last_log_ts = now; - logged = true; + neon_shard_log(shard_no, LOG, "no response from pageserver for %0.3f s, disconnecting", + INSTR_TIME_GET_DOUBLE(since_start)); + pageserver_disconnect(shard_no); + return -1; } goto retry; @@ -792,14 +851,18 @@ retry: * If we logged earlier that the response is taking a long time, log * another message when the response is finally received. */ - if (logged) + if (shard->receive_logged) { INSTR_TIME_SET_CURRENT(now); since_start = now; - INSTR_TIME_SUBTRACT(since_start, start_ts); - neon_shard_log(shard_no, LOG, "received response from pageserver after %0.3f s", + INSTR_TIME_SUBTRACT(since_start, shard->receive_start_time); + neon_shard_log(shard_no, LOG, + "received response from pageserver after %0.3f s", INSTR_TIME_GET_DOUBLE(since_start)); } + INSTR_TIME_SET_ZERO(shard->receive_start_time); + INSTR_TIME_SET_ZERO(shard->receive_last_log_time); + shard->receive_logged = false; return ret; } @@ -973,9 +1036,17 @@ pageserver_receive(shardno_t shard_no) pfree(msg); } } + else if (rc == -1 && shard->state == PS_Disconnected) + { + /* If the state is 'Disconnected', the disconnection message was already logged */ + resp = NULL; + } else if (rc == -1) { - neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: psql end of copy data: %s", pchomp(PQerrorMessage(pageserver_conn))); + char *msg = pchomp(PQerrorMessage(pageserver_conn)); + + neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: psql end of copy data: %s", msg); + pfree(msg); pageserver_disconnect(shard_no); resp = NULL; } @@ -1261,6 +1332,26 @@ pg_init_libpagestore(void) 0, /* no flags required */ NULL, NULL, NULL); + DefineCustomIntVariable("neon.pageserver_response_log_timeout", + "pageserver response log timeout", + "If the pageserver doesn't respond to a request within this timeout," + "a message is printed to the log.", + &pageserver_response_log_timeout, + 10000, 100, INT_MAX, + PGC_SUSET, + GUC_UNIT_MS, + NULL, NULL, NULL); + + DefineCustomIntVariable("neon.pageserver_response_disconnect_timeout", + "pageserver response diconnect timeout", + "If the pageserver doesn't respond to a request within this timeout," + "disconnect and reconnect.", + &pageserver_response_disconnect_timeout, + 120000, 100, INT_MAX, + PGC_SUSET, + GUC_UNIT_MS, + NULL, NULL, NULL); + relsize_hash_init(); if (page_server != NULL) diff --git a/test_runner/regress/test_bad_connection.py b/test_runner/regress/test_bad_connection.py index c0c9537421..bfc5cb174e 100644 --- a/test_runner/regress/test_bad_connection.py +++ b/test_runner/regress/test_bad_connection.py @@ -7,6 +7,7 @@ import psycopg2.errors import pytest from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.utils import USE_LFC @pytest.mark.timeout(600) @@ -80,3 +81,193 @@ def test_compute_pageserver_connection_stress(neon_env_builder: NeonEnvBuilder): # do a graceful shutdown which would had caught the allowed_errors before # https://github.com/neondatabase/neon/pull/8632 env.pageserver.stop() + + +def test_compute_pageserver_hung_connections(neon_env_builder: NeonEnvBuilder): + """ + Test timeouts in waiting for response to pageserver request + """ + env = neon_env_builder.init_start() + env.pageserver.allowed_errors.append(".*slow GetPage.*") + pageserver_http = env.pageserver.http_client() + endpoint = env.endpoints.create_start( + "main", + tenant_id=env.initial_tenant, + config_lines=["autovacuum = off"], + ) + pg_conn = endpoint.connect() + cur = pg_conn.cursor() + + # Create table, and insert some rows. Make it big enough that it doesn't fit in + # shared_buffers, otherwise the SELECT after restart will just return answer + # from shared_buffers without hitting the page server, which defeats the point + # of this test. + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g + """ + ) + + # Verify that the table is larger than shared_buffers + cur.execute( + """ + select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size + from pg_settings where name = 'shared_buffers' + """ + ) + row = cur.fetchone() + assert row is not None + log.debug(f"shared_buffers is {row[0]}, table size {row[1]}") + assert int(row[0]) < int(row[1]) + + # Print the backend PID so that it can be compared with the logs easily + cur.execute("SELECT pg_backend_pid()") + row = cur.fetchone() + assert row is not None + log.info(f"running test workload in backend PID {row[0]}") + + def run_workload(duration: float): + end_time = time.time() + duration + times_executed = 0 + while time.time() < end_time: + if random.random() < 0.5: + cur.execute("INSERT INTO foo VALUES ('stas'), ('heikki')") + else: + cur.execute("SELECT t FROM foo ORDER BY RANDOM() LIMIT 10") + cur.fetchall() + times_executed += 1 + log.info(f"Workload executed {times_executed} times") + assert times_executed > 0 + + ## Test short connection hiccups + ## + ## This is to exercise the logging timeout. + log.info("running workload with log timeout") + cur.execute("SET neon.pageserver_response_log_timeout = '500ms'") + pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%3*return(3000)")) + run_workload(20) + + # check that the message was logged + assert endpoint.log_contains("no response received from pageserver for .* s, still waiting") + assert endpoint.log_contains("received response from pageserver after .* s") + + ## Test connections that are hung for longer + ## + ## This exercises the disconnect timeout. We'll disconnect and + ## reconnect after 500 ms. + log.info("running workload with disconnect timeout") + cur.execute("SET neon.pageserver_response_log_timeout = '250ms'") + cur.execute("SET neon.pageserver_response_disconnect_timeout = '500ms'") + pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%3*return(3000)")) + run_workload(15) + + assert endpoint.log_contains("no response from pageserver for .* s, disconnecting") + + # do a graceful shutdown which would had caught the allowed_errors before + # https://github.com/neondatabase/neon/pull/8632 + env.pageserver.stop() + + +def test_compute_pageserver_statement_timeout(neon_env_builder: NeonEnvBuilder): + """ + Test statement_timeout while waiting for response to pageserver request + """ + env = neon_env_builder.init_start() + env.pageserver.allowed_errors.append(".*slow GetPage.*") + pageserver_http = env.pageserver.http_client() + + # Make sure the shared_buffers and LFC are tiny, to ensure the queries + # hit the storage. Disable autovacuum to make the test more deterministic. + config_lines = [ + "shared_buffers='512kB'", + "autovacuum = off", + ] + if USE_LFC: + config_lines = ["neon.max_file_cache_size = 1MB", "neon.file_cache_size_limit = 1MB"] + endpoint = env.endpoints.create_start( + "main", + tenant_id=env.initial_tenant, + config_lines=config_lines, + ) + pg_conn = endpoint.connect() + cur = pg_conn.cursor() + + # Disable parallel query. Parallel workers open their own pageserver connections, + # which messes up the test logic. + cur.execute("SET max_parallel_workers_per_gather=0") + cur.execute("SET effective_io_concurrency=0") + + # Create table, and insert some rows. Make it big enough that it doesn't fit in + # shared_buffers, otherwise the SELECT after restart will just return answer + # from shared_buffers without hitting the page server, which defeats the point + # of this test. + cur.execute("CREATE TABLE foo (t text)") + cur.execute( + """ + INSERT INTO foo + SELECT 'long string to consume some space' || g + FROM generate_series(1, 100000) g + """ + ) + + # Verify that the table is larger than shared_buffers + cur.execute( + """ + select setting::int * pg_size_bytes(unit) as shared_buffers, pg_relation_size('foo') as tbl_size + from pg_settings where name = 'shared_buffers' + """ + ) + row = cur.fetchone() + assert row is not None + log.debug(f"shared_buffers is {row[0]}, table size {row[1]}") + assert int(row[0]) < int(row[1]) + + ## Run a query until the compute->pageserver connection hits the failpoint and + ## get stuck. This tests that the statement_timeout is obeyed while waiting on a + ## GetPage request. + log.info("running workload with statement_timeout") + cur.execute("SET neon.pageserver_response_log_timeout = '2000ms'") + cur.execute("SET neon.pageserver_response_disconnect_timeout = '30000ms'") + cur.execute("SET statement_timeout='10s'") + pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "10%return(60000)")) + + start_time = time.time() + with pytest.raises(psycopg2.errors.QueryCanceled): + cur.execute("SELECT count(*) FROM foo") + cur.fetchall() + log.info("Statement timeout reached") + end_time = time.time() + # Verify that the statement_timeout canceled the query before + # neon.pageserver_response_disconnect_timeout expired + assert end_time - start_time < 40 + times_canceled = 1 + + # Should not have disconnected yet + assert not endpoint.log_contains("no response from pageserver for .* s, disconnecting") + + # Clear the failpoint. This doesn't affect the connection that already hit it. It + # will keep waiting. But subsequent connections will work normally. + pageserver_http.configure_failpoints(("before-pagestream-msg-flush", "off")) + + # If we keep retrying, we should eventually succeed. (This tests that the + # neon.pageserver_response_disconnect_timeout is not reset on query + # cancellation.) + while times_canceled < 10: + try: + cur.execute("SELECT count(*) FROM foo") + cur.fetchall() + log.info("Statement succeeded") + break + except psycopg2.errors.QueryCanceled: + log.info("Statement timed out, retrying") + times_canceled += 1 + assert times_canceled > 1 and times_canceled < 10 + + assert endpoint.log_contains("no response from pageserver for .* s, disconnecting") + + # do a graceful shutdown which would had caught the allowed_errors before + # https://github.com/neondatabase/neon/pull/8632 + env.pageserver.stop() From 6621be6b7b5c860739d611096c248f6ec44d6fb4 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 24 Feb 2025 23:01:14 +0100 Subject: [PATCH 004/207] pageserver: tweak slow GetPage logging (#10956) ## Problem We recently added slow GetPage request logging. However, this unintentionally included the flush time when logging (which we already have separate logging for). It also logs at WARN level, which is a bit aggressive since we see this fire quite frequently. Follows https://github.com/neondatabase/neon/pull/10906. ## Summary of changes * Only log the request execution time, not the flush time. * Extract a `pagestream_dispatch_batched_message()` helper. * Rename `warn_slow()` to `log_slow()` and downgrade to INFO. --- libs/utils/benches/README.md | 4 +- libs/utils/benches/benchmarks.rs | 12 +- libs/utils/src/logging.rs | 12 +- pageserver/src/page_service.rs | 298 ++++++++++++++++--------------- 4 files changed, 170 insertions(+), 156 deletions(-) diff --git a/libs/utils/benches/README.md b/libs/utils/benches/README.md index e23ec268c2..5afbe3cf2b 100644 --- a/libs/utils/benches/README.md +++ b/libs/utils/benches/README.md @@ -10,14 +10,14 @@ cargo bench --package utils cargo bench --package utils --bench benchmarks # Specific benchmark. -cargo bench --package utils --bench benchmarks warn_slow/enabled=true +cargo bench --package utils --bench benchmarks log_slow/enabled=true # List available benchmarks. cargo bench --package utils --benches -- --list # Generate flamegraph profiles using pprof-rs, profiling for 10 seconds. # Output in target/criterion/*/profile/flamegraph.svg. -cargo bench --package utils --bench benchmarks warn_slow/enabled=true --profile-time 10 +cargo bench --package utils --bench benchmarks log_slow/enabled=true --profile-time 10 ``` Additional charts and statistics are available in `target/criterion/report/index.html`. diff --git a/libs/utils/benches/benchmarks.rs b/libs/utils/benches/benchmarks.rs index cff3792f3a..348e27ac47 100644 --- a/libs/utils/benches/benchmarks.rs +++ b/libs/utils/benches/benchmarks.rs @@ -3,14 +3,14 @@ use std::time::Duration; use criterion::{criterion_group, criterion_main, Bencher, Criterion}; use pprof::criterion::{Output, PProfProfiler}; use utils::id; -use utils::logging::warn_slow; +use utils::logging::log_slow; // Register benchmarks with Criterion. criterion_group!( name = benches; config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); targets = bench_id_stringify, - bench_warn_slow, + bench_log_slow, ); criterion_main!(benches); @@ -29,9 +29,9 @@ pub fn bench_id_stringify(c: &mut Criterion) { }); } -pub fn bench_warn_slow(c: &mut Criterion) { +pub fn bench_log_slow(c: &mut Criterion) { for enabled in [false, true] { - c.bench_function(&format!("warn_slow/enabled={enabled}"), |b| { + c.bench_function(&format!("log_slow/enabled={enabled}"), |b| { run_bench(b, enabled).unwrap() }); } @@ -45,11 +45,11 @@ pub fn bench_warn_slow(c: &mut Criterion) { .enable_all() .build()?; - // Test both with and without warn_slow, since we're essentially measuring Tokio scheduling + // Test both with and without log_slow, since we're essentially measuring Tokio scheduling // performance too. Use a simple noop future that yields once, to avoid any scheduler fast // paths for a ready future. if enabled { - b.iter(|| runtime.block_on(warn_slow("ready", THRESHOLD, tokio::task::yield_now()))); + b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now()))); } else { b.iter(|| runtime.block_on(tokio::task::yield_now())); } diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 95c69ac8ba..2c36942f43 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -7,7 +7,7 @@ use metrics::{IntCounter, IntCounterVec}; use once_cell::sync::Lazy; use strum_macros::{EnumString, VariantNames}; use tokio::time::Instant; -use tracing::warn; +use tracing::info; /// Logs a critical error, similarly to `tracing::error!`. This will: /// @@ -322,11 +322,13 @@ impl std::fmt::Debug for SecretString { } } -/// Logs a periodic warning if a future is slow to complete. +/// Logs a periodic message if a future is slow to complete. /// /// This is performance-sensitive as it's used on the GetPage read path. +/// +/// TODO: consider upgrading this to a warning, but currently it fires too often. #[inline] -pub async fn warn_slow(name: &str, threshold: Duration, f: impl Future) -> O { +pub async fn log_slow(name: &str, threshold: Duration, f: impl Future) -> O { // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and // won't fit on the stack. let mut f = Box::pin(f); @@ -345,13 +347,13 @@ pub async fn warn_slow(name: &str, threshold: Duration, f: impl Future= threshold { - warn!("slow {name} completed after {:.3}s", elapsed.as_secs_f64()); + info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64()); } return output; } let elapsed = started.elapsed().as_secs_f64(); - warn!("slow {name} still running after {elapsed:.3}s",); + info!("slow {name} still running after {elapsed:.3}s",); attempt += 1; } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index cab3d76bf8..668f0eee36 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -40,7 +40,7 @@ use tokio::io::{AsyncWriteExt, BufWriter}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::*; -use utils::logging::warn_slow; +use utils::logging::log_slow; use utils::sync::gate::{Gate, GateGuard}; use utils::sync::spsc_fold; use utils::{ @@ -84,8 +84,8 @@ use std::os::fd::AsRawFd; /// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`]. const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000); -/// Threshold at which to log a warning about slow GetPage requests. -const WARN_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30); +/// Threshold at which to log slow GetPage requests. +const LOG_SLOW_GETPAGE_THRESHOLD: Duration = Duration::from_secs(30); /////////////////////////////////////////////////////////////////////////////// @@ -1087,11 +1087,147 @@ impl PageServerHandler { batch }; - // invoke handler function - let (mut handler_results, span): ( + // Dispatch the batch to the appropriate request handler. + let (mut handler_results, span) = log_slow( + batch.as_static_str(), + LOG_SLOW_GETPAGE_THRESHOLD, + self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx), + ) + .await?; + + // We purposefully don't count flush time into the smgr operation timer. + // + // The reason is that current compute client will not perform protocol processing + // if the postgres backend process is doing things other than `->smgr_read()`. + // This is especially the case for prefetch. + // + // If the compute doesn't read from the connection, eventually TCP will backpressure + // all the way into our flush call below. + // + // The timer's underlying metric is used for a storage-internal latency SLO and + // we don't want to include latency in it that we can't control. + // And as pointed out above, in this case, we don't control the time that flush will take. + // + // We put each response in the batch onto the wire in a separate pgb_writer.flush() + // call, which (all unmeasured) adds syscall overhead but reduces time to first byte + // and avoids building up a "giant" contiguous userspace buffer to hold the entire response. + // TODO: vectored socket IO would be great, but pgb_writer doesn't support that. + let flush_timers = { + let flushing_start_time = Instant::now(); + let mut flush_timers = Vec::with_capacity(handler_results.len()); + for handler_result in &mut handler_results { + let flush_timer = match handler_result { + Ok((_, timer)) => Some( + timer + .observe_execution_end(flushing_start_time) + .expect("we are the first caller"), + ), + Err(_) => { + // TODO: measure errors + None + } + }; + flush_timers.push(flush_timer); + } + assert_eq!(flush_timers.len(), handler_results.len()); + flush_timers + }; + + // Map handler result to protocol behavior. + // Some handler errors cause exit from pagestream protocol. + // Other handler errors are sent back as an error message and we stay in pagestream protocol. + for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) { + let response_msg = match handler_result { + Err(e) => match &e.err { + PageStreamError::Shutdown => { + // If we fail to fulfil a request during shutdown, which may be _because_ of + // shutdown, then do not send the error to the client. Instead just drop the + // connection. + span.in_scope(|| info!("dropping connection due to shutdown")); + return Err(QueryError::Shutdown); + } + PageStreamError::Reconnect(reason) => { + span.in_scope(|| info!("handler requested reconnect: {reason}")); + return Err(QueryError::Reconnect); + } + PageStreamError::Read(_) + | PageStreamError::LsnTimeout(_) + | PageStreamError::NotFound(_) + | PageStreamError::BadRequest(_) => { + // print the all details to the log with {:#}, but for the client the + // error message is enough. Do not log if shutting down, as the anyhow::Error + // here includes cancellation which is not an error. + let full = utils::error::report_compact_sources(&e.err); + span.in_scope(|| { + error!("error reading relation or page version: {full:#}") + }); + + PagestreamBeMessage::Error(PagestreamErrorResponse { + req: e.req, + message: e.err.to_string(), + }) + } + }, + Ok((response_msg, _op_timer_already_observed)) => response_msg, + }; + + // + // marshal & transmit response message + // + + pgb_writer.write_message_noflush(&BeMessage::CopyData( + &response_msg.serialize(protocol_version), + ))?; + + failpoint_support::sleep_millis_async!("before-pagestream-msg-flush", cancel); + + // what we want to do + let socket_fd = pgb_writer.socket_fd; + let flush_fut = pgb_writer.flush(); + // metric for how long flushing takes + let flush_fut = match flushing_timer { + Some(flushing_timer) => futures::future::Either::Left(flushing_timer.measure( + Instant::now(), + flush_fut, + socket_fd, + )), + None => futures::future::Either::Right(flush_fut), + }; + // do it while respecting cancellation + let _: () = async move { + tokio::select! { + biased; + _ = cancel.cancelled() => { + // We were requested to shut down. + info!("shutdown request received in page handler"); + return Err(QueryError::Shutdown) + } + res = flush_fut => { + res?; + } + } + Ok(()) + } + .await?; + } + Ok(()) + } + + /// Helper which dispatches a batched message to the appropriate handler. + /// Returns a vec of results, along with the extracted trace span. + async fn pagestream_dispatch_batched_message( + &mut self, + batch: BatchedFeMessage, + io_concurrency: IoConcurrency, + ctx: &RequestContext, + ) -> Result< + ( Vec>, - _, - ) = match batch { + Span, + ), + QueryError, + > { + Ok(match batch { BatchedFeMessage::Exists { span, timer, @@ -1213,124 +1349,7 @@ impl PageServerHandler { // call the handler. (vec![Err(error)], span) } - }; - - // We purposefully don't count flush time into the smgr operation timer. - // - // The reason is that current compute client will not perform protocol processing - // if the postgres backend process is doing things other than `->smgr_read()`. - // This is especially the case for prefetch. - // - // If the compute doesn't read from the connection, eventually TCP will backpressure - // all the way into our flush call below. - // - // The timer's underlying metric is used for a storage-internal latency SLO and - // we don't want to include latency in it that we can't control. - // And as pointed out above, in this case, we don't control the time that flush will take. - // - // We put each response in the batch onto the wire in a separate pgb_writer.flush() - // call, which (all unmeasured) adds syscall overhead but reduces time to first byte - // and avoids building up a "giant" contiguous userspace buffer to hold the entire response. - // TODO: vectored socket IO would be great, but pgb_writer doesn't support that. - let flush_timers = { - let flushing_start_time = Instant::now(); - let mut flush_timers = Vec::with_capacity(handler_results.len()); - for handler_result in &mut handler_results { - let flush_timer = match handler_result { - Ok((_, timer)) => Some( - timer - .observe_execution_end(flushing_start_time) - .expect("we are the first caller"), - ), - Err(_) => { - // TODO: measure errors - None - } - }; - flush_timers.push(flush_timer); - } - assert_eq!(flush_timers.len(), handler_results.len()); - flush_timers - }; - - // Map handler result to protocol behavior. - // Some handler errors cause exit from pagestream protocol. - // Other handler errors are sent back as an error message and we stay in pagestream protocol. - for (handler_result, flushing_timer) in handler_results.into_iter().zip(flush_timers) { - let response_msg = match handler_result { - Err(e) => match &e.err { - PageStreamError::Shutdown => { - // If we fail to fulfil a request during shutdown, which may be _because_ of - // shutdown, then do not send the error to the client. Instead just drop the - // connection. - span.in_scope(|| info!("dropping connection due to shutdown")); - return Err(QueryError::Shutdown); - } - PageStreamError::Reconnect(reason) => { - span.in_scope(|| info!("handler requested reconnect: {reason}")); - return Err(QueryError::Reconnect); - } - PageStreamError::Read(_) - | PageStreamError::LsnTimeout(_) - | PageStreamError::NotFound(_) - | PageStreamError::BadRequest(_) => { - // print the all details to the log with {:#}, but for the client the - // error message is enough. Do not log if shutting down, as the anyhow::Error - // here includes cancellation which is not an error. - let full = utils::error::report_compact_sources(&e.err); - span.in_scope(|| { - error!("error reading relation or page version: {full:#}") - }); - - PagestreamBeMessage::Error(PagestreamErrorResponse { - req: e.req, - message: e.err.to_string(), - }) - } - }, - Ok((response_msg, _op_timer_already_observed)) => response_msg, - }; - - // - // marshal & transmit response message - // - - pgb_writer.write_message_noflush(&BeMessage::CopyData( - &response_msg.serialize(protocol_version), - ))?; - - failpoint_support::sleep_millis_async!("before-pagestream-msg-flush", cancel); - - // what we want to do - let socket_fd = pgb_writer.socket_fd; - let flush_fut = pgb_writer.flush(); - // metric for how long flushing takes - let flush_fut = match flushing_timer { - Some(flushing_timer) => futures::future::Either::Left(flushing_timer.measure( - Instant::now(), - flush_fut, - socket_fd, - )), - None => futures::future::Either::Right(flush_fut), - }; - // do it while respecting cancellation - let _: () = async move { - tokio::select! { - biased; - _ = cancel.cancelled() => { - // We were requested to shut down. - info!("shutdown request received in page handler"); - return Err(QueryError::Shutdown) - } - res = flush_fut => { - res?; - } - } - Ok(()) - } - .await?; - } - Ok(()) + }) } /// Pagestream sub-protocol handler. @@ -1476,19 +1495,16 @@ impl PageServerHandler { } }; - let result = warn_slow( - msg.as_static_str(), - WARN_SLOW_GETPAGE_THRESHOLD, - self.pagesteam_handle_batched_message( + let result = self + .pagesteam_handle_batched_message( pgb_writer, msg, io_concurrency.clone(), &cancel, protocol_version, ctx, - ), - ) - .await; + ) + .await; match result { Ok(()) => {} Err(e) => break e, @@ -1652,17 +1668,13 @@ impl PageServerHandler { return Err(e); } }; - warn_slow( - batch.as_static_str(), - WARN_SLOW_GETPAGE_THRESHOLD, - self.pagesteam_handle_batched_message( - pgb_writer, - batch, - io_concurrency.clone(), - &cancel, - protocol_version, - &ctx, - ), + self.pagesteam_handle_batched_message( + pgb_writer, + batch, + io_concurrency.clone(), + &cancel, + protocol_version, + &ctx, ) .await?; } From 5d17640944b529834158c689cf0e821871dbf344 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 25 Feb 2025 09:33:08 +0000 Subject: [PATCH 005/207] storcon: send heartbeats concurrently (#10954) ## Problem While looking at logs I noticed that heartbeats are sent sequentially. The loop polling the UnorderedSet is at the wrong level of identation. Instead of doing it after we have the full set, we did after each entry. ## Summary of Changes Poll the UnorderedSet properly. --- storage_controller/src/heartbeater.rs | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs index 52b6110667..88ee7887d3 100644 --- a/storage_controller/src/heartbeater.rs +++ b/storage_controller/src/heartbeater.rs @@ -223,21 +223,21 @@ impl HeartBeat for HeartbeaterTask Some((*node_id, status)) } }); + } - loop { - let maybe_status = tokio::select! { - next = heartbeat_futs.next() => { - match next { - Some(result) => result, - None => { break; } - } - }, - _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); } - }; + loop { + let maybe_status = tokio::select! { + next = heartbeat_futs.next() => { + match next { + Some(result) => result, + None => { break; } + } + }, + _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); } + }; - if let Some((node_id, status)) = maybe_status { - new_state.insert(node_id, status); - } + if let Some((node_id, status)) = maybe_status { + new_state.insert(node_id, status); } } @@ -363,21 +363,21 @@ impl HeartBeat for HeartbeaterTask { - match next { - Some(result) => result, - None => { break; } - } - }, - _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); } - }; + loop { + let maybe_status = tokio::select! { + next = heartbeat_futs.next() => { + match next { + Some(result) => result, + None => { break; } + } + }, + _ = self.cancel.cancelled() => { return Err(HeartbeaterError::Cancel); } + }; - if let Some((node_id, status)) = maybe_status { - new_state.insert(node_id, status); - } + if let Some((node_id, status)) = maybe_status { + new_state.insert(node_id, status); } } From 0d9a45a4753b124e47aeabf24986b7b9e1e2f4b7 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 25 Feb 2025 10:21:35 +0000 Subject: [PATCH 006/207] safekeeper: invalidate start of interpreted batch on reader resets (#10951) ## Problem The interpreted WAL reader tracks the start of the current logical batch. This needs to be invalidated when the reader is reset. This bug caused a couple of WAL gap alerts in staging. ## Summary of changes * Refactor to make it possible to write a reproducer * Add repro unit test * Fix by resetting the start with the reader Related https://github.com/neondatabase/cloud/issues/23935 --- safekeeper/src/send_interpreted_wal.rs | 247 ++++++++++++++++++++++--- safekeeper/src/send_wal.rs | 5 +- 2 files changed, 221 insertions(+), 31 deletions(-) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index fb06339604..0662bb9518 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -100,7 +100,12 @@ struct ShardSenderState { /// State of [`InterpretedWalReader`] visible outside of the task running it. #[derive(Debug)] pub(crate) enum InterpretedWalReaderState { - Running { current_position: Lsn }, + Running { + current_position: Lsn, + /// Tracks the start of the PG WAL LSN from which the current batch of + /// interpreted records originated. + current_batch_wal_start: Option, + }, Done, } @@ -122,14 +127,21 @@ pub enum InterpretedWalReaderError { } enum CurrentPositionUpdate { - Reset(Lsn), + Reset { from: Lsn, to: Lsn }, NotReset(Lsn), } impl CurrentPositionUpdate { fn current_position(&self) -> Lsn { match self { - CurrentPositionUpdate::Reset(lsn) => *lsn, + CurrentPositionUpdate::Reset { from: _, to } => *to, + CurrentPositionUpdate::NotReset(lsn) => *lsn, + } + } + + fn previous_position(&self) -> Lsn { + match self { + CurrentPositionUpdate::Reset { from, to: _ } => *from, CurrentPositionUpdate::NotReset(lsn) => *lsn, } } @@ -145,16 +157,33 @@ impl InterpretedWalReaderState { } } + #[cfg(test)] + fn current_batch_wal_start(&self) -> Option { + match self { + InterpretedWalReaderState::Running { + current_batch_wal_start, + .. + } => *current_batch_wal_start, + InterpretedWalReaderState::Done => None, + } + } + // Reset the current position of the WAL reader if the requested starting position // of the new shard is smaller than the current value. fn maybe_reset(&mut self, new_shard_start_pos: Lsn) -> CurrentPositionUpdate { match self { InterpretedWalReaderState::Running { - current_position, .. + current_position, + current_batch_wal_start, } => { if new_shard_start_pos < *current_position { + let from = *current_position; *current_position = new_shard_start_pos; - CurrentPositionUpdate::Reset(*current_position) + *current_batch_wal_start = None; + CurrentPositionUpdate::Reset { + from, + to: *current_position, + } } else { CurrentPositionUpdate::NotReset(*current_position) } @@ -164,6 +193,47 @@ impl InterpretedWalReaderState { } } } + + fn update_current_batch_wal_start(&mut self, lsn: Lsn) { + match self { + InterpretedWalReaderState::Running { + current_batch_wal_start, + .. + } => { + if current_batch_wal_start.is_none() { + *current_batch_wal_start = Some(lsn); + } + } + InterpretedWalReaderState::Done => { + panic!("update_current_batch_wal_start called on finished reader") + } + } + } + + fn take_current_batch_wal_start(&mut self) -> Lsn { + match self { + InterpretedWalReaderState::Running { + current_batch_wal_start, + .. + } => current_batch_wal_start.take().unwrap(), + InterpretedWalReaderState::Done => { + panic!("take_current_batch_wal_start called on finished reader") + } + } + } + + fn update_current_position(&mut self, lsn: Lsn) { + match self { + InterpretedWalReaderState::Running { + current_position, .. + } => { + *current_position = lsn; + } + InterpretedWalReaderState::Done => { + panic!("update_current_position called on finished reader") + } + } + } } pub(crate) struct AttachShardNotification { @@ -184,6 +254,7 @@ impl InterpretedWalReader { ) -> InterpretedWalReaderHandle { let state = Arc::new(std::sync::RwLock::new(InterpretedWalReaderState::Running { current_position: start_pos, + current_batch_wal_start: None, })); let (shard_notification_tx, shard_notification_rx) = tokio::sync::mpsc::unbounded_channel(); @@ -237,9 +308,13 @@ impl InterpretedWalReader { tx: tokio::sync::mpsc::Sender, shard: ShardIdentity, pg_version: u32, + shard_notification_rx: Option< + tokio::sync::mpsc::UnboundedReceiver, + >, ) -> InterpretedWalReader { let state = Arc::new(std::sync::RwLock::new(InterpretedWalReaderState::Running { current_position: start_pos, + current_batch_wal_start: None, })); InterpretedWalReader { @@ -252,7 +327,7 @@ impl InterpretedWalReader { next_record_lsn: start_pos, }], )]), - shard_notification_rx: None, + shard_notification_rx, state: state.clone(), pg_version, } @@ -295,10 +370,6 @@ impl InterpretedWalReader { let mut wal_decoder = WalStreamDecoder::new(start_pos, self.pg_version); - // Tracks the start of the PG WAL LSN from which the current batch of - // interpreted records originated. - let mut current_batch_wal_start_lsn: Option = None; - loop { tokio::select! { // Main branch for reading WAL and forwarding it @@ -319,11 +390,7 @@ impl InterpretedWalReader { } }; - // We will already have a value if the previous chunks of WAL - // did not decode into anything useful. - if current_batch_wal_start_lsn.is_none() { - current_batch_wal_start_lsn = Some(wal_start_lsn); - } + self.state.write().unwrap().update_current_batch_wal_start(wal_start_lsn); wal_decoder.feed_bytes(&wal); @@ -380,16 +447,11 @@ impl InterpretedWalReader { // Update the current position such that new receivers can decide // whether to attach to us or spawn a new WAL reader. - match &mut *self.state.write().unwrap() { - InterpretedWalReaderState::Running { current_position, .. } => { - *current_position = max_next_record_lsn; - }, - InterpretedWalReaderState::Done => { - unreachable!() - } - } - - let batch_wal_start_lsn = current_batch_wal_start_lsn.take().unwrap(); + let batch_wal_start_lsn = { + let mut guard = self.state.write().unwrap(); + guard.update_current_position(max_next_record_lsn); + guard.take_current_batch_wal_start() + }; // Send interpreted records downstream. Anything that has already been seen // by a shard is filtered out. @@ -480,7 +542,7 @@ impl InterpretedWalReader { // anything outside the select statement. let position_reset = self.state.write().unwrap().maybe_reset(start_pos); match position_reset { - CurrentPositionUpdate::Reset(to) => { + CurrentPositionUpdate::Reset { from: _, to } => { self.wal_stream.reset(to).await; wal_decoder = WalStreamDecoder::new(to, self.pg_version); }, @@ -488,14 +550,22 @@ impl InterpretedWalReader { }; tracing::info!( - "Added shard sender {} with start_pos={} current_pos={}", - ShardSenderId::new(shard_id, new_sender_id), start_pos, position_reset.current_position() + "Added shard sender {} with start_pos={} previous_pos={} current_pos={}", + ShardSenderId::new(shard_id, new_sender_id), + start_pos, + position_reset.previous_position(), + position_reset.current_position(), ); } } } } } + + #[cfg(test)] + fn state(&self) -> Arc> { + self.state.clone() + } } impl InterpretedWalReaderHandle { @@ -633,7 +703,7 @@ mod tests { }; use crate::{ - send_interpreted_wal::{Batch, InterpretedWalReader}, + send_interpreted_wal::{AttachShardNotification, Batch, InterpretedWalReader}, test_utils::Env, wal_reader_stream::StreamingWalReader, }; @@ -913,4 +983,123 @@ mod tests { assert_eq!(sender.received_next_record_lsns, expected); } } + + #[tokio::test] + async fn test_batch_start_tracking_on_reset() { + // When the WAL stream is reset to an older LSN, + // the current batch start LSN should be invalidated. + // This test constructs such a scenario: + // 1. Shard 0 is reading somewhere ahead + // 2. Reader reads some WAL, but does not decode a full record (partial read) + // 3. Shard 1 attaches to the reader and resets it to an older LSN + // 4. Shard 1 should get the correct batch WAL start LSN + let _ = env_logger::builder().is_test(true).try_init(); + + const SIZE: usize = 64 * 1024; + const MSG_COUNT: usize = 10; + const PG_VERSION: u32 = 17; + const SHARD_COUNT: u8 = 2; + const WAL_READER_BATCH_SIZE: usize = 8192; + + let start_lsn = Lsn::from_str("0/149FD18").unwrap(); + let shard_0_start_lsn = Lsn::from_str("0/14AFE10").unwrap(); + let env = Env::new(true).unwrap(); + let tli = env + .make_timeline(NodeId(1), TenantTimelineId::generate(), start_lsn) + .await + .unwrap(); + + let resident_tli = tli.wal_residence_guard().await.unwrap(); + let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None) + .await + .unwrap(); + let end_pos = end_watch.get(); + + let streaming_wal_reader = StreamingWalReader::new( + resident_tli, + None, + shard_0_start_lsn, + end_pos, + end_watch, + WAL_READER_BATCH_SIZE, + ); + + let shard_0 = ShardIdentity::new( + ShardNumber(0), + ShardCount(SHARD_COUNT), + ShardStripeSize::default(), + ) + .unwrap(); + + let shard_1 = ShardIdentity::new( + ShardNumber(1), + ShardCount(SHARD_COUNT), + ShardStripeSize::default(), + ) + .unwrap(); + + let mut shards = HashMap::new(); + + for shard_number in 0..SHARD_COUNT { + let shard_id = ShardIdentity::new( + ShardNumber(shard_number), + ShardCount(SHARD_COUNT), + ShardStripeSize::default(), + ) + .unwrap(); + let (tx, rx) = tokio::sync::mpsc::channel::(MSG_COUNT * 2); + shards.insert(shard_id, (Some(tx), Some(rx))); + } + + let shard_0_tx = shards.get_mut(&shard_0).unwrap().0.take().unwrap(); + + let (shard_notification_tx, shard_notification_rx) = tokio::sync::mpsc::unbounded_channel(); + + let reader = InterpretedWalReader::new( + streaming_wal_reader, + shard_0_start_lsn, + shard_0_tx, + shard_0, + PG_VERSION, + Some(shard_notification_rx), + ); + + let reader_state = reader.state(); + let mut reader_fut = std::pin::pin!(reader.run(start_lsn, &None)); + loop { + let poll = futures::poll!(reader_fut.as_mut()); + assert!(poll.is_pending()); + + let guard = reader_state.read().unwrap(); + if guard.current_batch_wal_start().is_some() { + break; + } + } + + shard_notification_tx + .send(AttachShardNotification { + shard_id: shard_1, + sender: shards.get_mut(&shard_1).unwrap().0.take().unwrap(), + start_pos: start_lsn, + }) + .unwrap(); + + let mut shard_1_rx = shards.get_mut(&shard_1).unwrap().1.take().unwrap(); + loop { + let poll = futures::poll!(reader_fut.as_mut()); + assert!(poll.is_pending()); + + let try_recv_res = shard_1_rx.try_recv(); + match try_recv_res { + Ok(batch) => { + assert_eq!(batch.records.raw_wal_start_lsn.unwrap(), start_lsn); + break; + } + Err(tokio::sync::mpsc::error::TryRecvError::Empty) => {} + Err(tokio::sync::mpsc::error::TryRecvError::Disconnected) => { + unreachable!(); + } + } + } + } } diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 4a4a74a0fd..72b1fd9fc3 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -624,8 +624,9 @@ impl SafekeeperPostgresHandler { MAX_SEND_SIZE, ); - let reader = - InterpretedWalReader::new(wal_reader, start_pos, tx, shard, pg_version); + let reader = InterpretedWalReader::new( + wal_reader, start_pos, tx, shard, pg_version, None, + ); let sender = InterpretedWalSender { format, From 758f5972807083987ca0c7977a1ef237bf642d73 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Tue, 25 Feb 2025 14:56:05 +0300 Subject: [PATCH 007/207] compute <-> sk protocol v3 (#10647) ## Problem As part of https://github.com/neondatabase/neon/issues/8614 we need to pass membership configurations between compute and safekeepers. ## Summary of changes Add version 3 of the protocol carrying membership configurations. Greeting message in both sides gets full conf, and other messages generation number only. Use protocol bump to include other accumulated changes: - stop packing whole structs on the wire as is; - make the tag u8 instead of u64; - send all ints in network order; - drop proposer_uuid, we can pass it in START_WAL_PUSH and it wasn't much useful anyway. Per message changes, apart from mconf: - ProposerGreeting: tenant / timeline id is sent now as hex cstring. Remove proto version, it is passed outside in START_WAL_PUSH. Remove postgres timeline, it is unused. Reorder fields a bit. - AcceptorGreeting: reorder fields - VoteResponse: timeline_start_lsn is removed. It can be taken from first member of term history, and later we won't need it at all when all timelines will be explicitly created. Vote itself is u8 instead of u64. - ProposerElected: timeline_start_lsn is removed for the same reasons. - AppendRequest: epoch_start_lsn removed, it is known from term history in ProposerElected. Both compute and sk are able to talk v2 and v3 to make rollbacks (in case we need them) easier; neon.safekeeper_proto_version GUC sets the client version. v2 code can be dropped later. So far empty conf is passed everywhere, future PRs will handle them. To test, add param to some tests choosing proto version; we want to test both 2 and 3 until we fully migrate. ref https://github.com/neondatabase/neon/issues/10326 --------- Co-authored-by: Arthur Petukhovsky --- libs/safekeeper_api/src/membership.rs | 18 +- libs/walproposer/src/walproposer.rs | 68 +- pgxn/neon/neon_utils.c | 20 + pgxn/neon/neon_utils.h | 2 + pgxn/neon/walproposer.c | 671 ++++++++++++----- pgxn/neon/walproposer.h | 133 +++- pgxn/neon/walproposer_compat.c | 46 +- pgxn/neon/walproposer_pg.c | 56 +- safekeeper/benches/receive_wal.rs | 10 +- safekeeper/src/json_ctrl.rs | 7 +- safekeeper/src/receive_wal.rs | 9 +- safekeeper/src/recovery.rs | 11 +- safekeeper/src/safekeeper.rs | 708 +++++++++++++----- safekeeper/src/test_utils.rs | 6 +- .../tests/walproposer_sim/safekeeper.rs | 8 +- test_runner/regress/test_normal_work.py | 20 +- .../regress/test_wal_acceptor_async.py | 14 +- 17 files changed, 1355 insertions(+), 452 deletions(-) diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs index 8b14a4f290..2f20ec5f94 100644 --- a/libs/safekeeper_api/src/membership.rs +++ b/libs/safekeeper_api/src/membership.rs @@ -68,14 +68,12 @@ impl Display for SafekeeperId { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(transparent)] pub struct MemberSet { - pub members: Vec, + pub m: Vec, } impl MemberSet { pub fn empty() -> Self { - MemberSet { - members: Vec::new(), - } + MemberSet { m: Vec::new() } } pub fn new(members: Vec) -> anyhow::Result { @@ -83,11 +81,11 @@ impl MemberSet { if hs.len() != members.len() { bail!("duplicate safekeeper id in the set {:?}", members); } - Ok(MemberSet { members }) + Ok(MemberSet { m: members }) } pub fn contains(&self, sk: &SafekeeperId) -> bool { - self.members.iter().any(|m| m.id == sk.id) + self.m.iter().any(|m| m.id == sk.id) } pub fn add(&mut self, sk: SafekeeperId) -> anyhow::Result<()> { @@ -97,7 +95,7 @@ impl MemberSet { sk.id, self )); } - self.members.push(sk); + self.m.push(sk); Ok(()) } } @@ -105,11 +103,7 @@ impl MemberSet { impl Display for MemberSet { /// Display as a comma separated list of members. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let sks_str = self - .members - .iter() - .map(|m| m.to_string()) - .collect::>(); + let sks_str = self.m.iter().map(|sk| sk.to_string()).collect::>(); write!(f, "({})", sks_str.join(", ")) } } diff --git a/libs/walproposer/src/walproposer.rs b/libs/walproposer/src/walproposer.rs index ba75171db2..60b606c64a 100644 --- a/libs/walproposer/src/walproposer.rs +++ b/libs/walproposer/src/walproposer.rs @@ -215,6 +215,7 @@ impl Wrapper { syncSafekeepers: config.sync_safekeepers, systemId: 0, pgTimeline: 1, + proto_version: 3, callback_data, }; let c_config = Box::into_raw(Box::new(c_config)); @@ -276,6 +277,7 @@ mod tests { use core::panic; use std::{ cell::Cell, + ffi::CString, sync::{atomic::AtomicUsize, mpsc::sync_channel}, }; @@ -496,57 +498,64 @@ mod tests { // Messages definitions are at walproposer.h // xxx: it would be better to extract them from safekeeper crate and // use serialization/deserialization here. - let greeting_tag = (b'g' as u64).to_ne_bytes(); - let proto_version = 2_u32.to_ne_bytes(); - let pg_version: [u8; 4] = PG_VERSION_NUM.to_ne_bytes(); - let proposer_id = [0; 16]; - let system_id = 0_u64.to_ne_bytes(); - let tenant_id = ttid.tenant_id.as_arr(); - let timeline_id = ttid.timeline_id.as_arr(); - let pg_tli = 1_u32.to_ne_bytes(); - let wal_seg_size = 16777216_u32.to_ne_bytes(); + let greeting_tag = (b'g').to_be_bytes(); + let tenant_id = CString::new(ttid.tenant_id.to_string()) + .unwrap() + .into_bytes_with_nul(); + let timeline_id = CString::new(ttid.timeline_id.to_string()) + .unwrap() + .into_bytes_with_nul(); + let mconf_gen = 0_u32.to_be_bytes(); + let mconf_members_len = 0_u32.to_be_bytes(); + let mconf_members_new_len = 0_u32.to_be_bytes(); + let pg_version: [u8; 4] = PG_VERSION_NUM.to_be_bytes(); + let system_id = 0_u64.to_be_bytes(); + let wal_seg_size = 16777216_u32.to_be_bytes(); + let proposer_greeting = [ greeting_tag.as_slice(), - proto_version.as_slice(), - pg_version.as_slice(), - proposer_id.as_slice(), - system_id.as_slice(), tenant_id.as_slice(), timeline_id.as_slice(), - pg_tli.as_slice(), + mconf_gen.as_slice(), + mconf_members_len.as_slice(), + mconf_members_new_len.as_slice(), + pg_version.as_slice(), + system_id.as_slice(), wal_seg_size.as_slice(), ] .concat(); - let voting_tag = (b'v' as u64).to_ne_bytes(); - let vote_request_term = 3_u64.to_ne_bytes(); - let proposer_id = [0; 16]; + let voting_tag = (b'v').to_be_bytes(); + let vote_request_term = 3_u64.to_be_bytes(); let vote_request = [ voting_tag.as_slice(), + mconf_gen.as_slice(), vote_request_term.as_slice(), - proposer_id.as_slice(), ] .concat(); - let acceptor_greeting_term = 2_u64.to_ne_bytes(); - let acceptor_greeting_node_id = 1_u64.to_ne_bytes(); + let acceptor_greeting_term = 2_u64.to_be_bytes(); + let acceptor_greeting_node_id = 1_u64.to_be_bytes(); let acceptor_greeting = [ greeting_tag.as_slice(), - acceptor_greeting_term.as_slice(), acceptor_greeting_node_id.as_slice(), + mconf_gen.as_slice(), + mconf_members_len.as_slice(), + mconf_members_new_len.as_slice(), + acceptor_greeting_term.as_slice(), ] .concat(); - let vote_response_term = 3_u64.to_ne_bytes(); - let vote_given = 1_u64.to_ne_bytes(); - let flush_lsn = 0x539_u64.to_ne_bytes(); - let truncate_lsn = 0x539_u64.to_ne_bytes(); - let th_len = 1_u32.to_ne_bytes(); - let th_term = 2_u64.to_ne_bytes(); - let th_lsn = 0x539_u64.to_ne_bytes(); - let timeline_start_lsn = 0x539_u64.to_ne_bytes(); + let vote_response_term = 3_u64.to_be_bytes(); + let vote_given = 1_u8.to_be_bytes(); + let flush_lsn = 0x539_u64.to_be_bytes(); + let truncate_lsn = 0x539_u64.to_be_bytes(); + let th_len = 1_u32.to_be_bytes(); + let th_term = 2_u64.to_be_bytes(); + let th_lsn = 0x539_u64.to_be_bytes(); let vote_response = [ voting_tag.as_slice(), + mconf_gen.as_slice(), vote_response_term.as_slice(), vote_given.as_slice(), flush_lsn.as_slice(), @@ -554,7 +563,6 @@ mod tests { th_len.as_slice(), th_term.as_slice(), th_lsn.as_slice(), - timeline_start_lsn.as_slice(), ] .concat(); diff --git a/pgxn/neon/neon_utils.c b/pgxn/neon/neon_utils.c index 1fb4ed9522..1fad44bd58 100644 --- a/pgxn/neon/neon_utils.c +++ b/pgxn/neon/neon_utils.c @@ -51,6 +51,26 @@ HexDecodeString(uint8 *result, char *input, int nbytes) return true; } +/* -------------------------------- + * pq_getmsgint16 - get a binary 2-byte int from a message buffer + * -------------------------------- + */ +uint16 +pq_getmsgint16(StringInfo msg) +{ + return pq_getmsgint(msg, 2); +} + +/* -------------------------------- + * pq_getmsgint32 - get a binary 4-byte int from a message buffer + * -------------------------------- + */ +uint32 +pq_getmsgint32(StringInfo msg) +{ + return pq_getmsgint(msg, 4); +} + /* -------------------------------- * pq_getmsgint32_le - get a binary 4-byte int from a message buffer in native (LE) order * -------------------------------- diff --git a/pgxn/neon/neon_utils.h b/pgxn/neon/neon_utils.h index 89683714f1..7480ac28cc 100644 --- a/pgxn/neon/neon_utils.h +++ b/pgxn/neon/neon_utils.h @@ -8,6 +8,8 @@ #endif bool HexDecodeString(uint8 *result, char *input, int nbytes); +uint16 pq_getmsgint16(StringInfo msg); +uint32 pq_getmsgint32(StringInfo msg); uint32 pq_getmsgint32_le(StringInfo msg); uint64 pq_getmsgint64_le(StringInfo msg); void pq_sendint32_le(StringInfo buf, uint32 i); diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 7472fd6afc..d7604e30d7 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -70,6 +70,7 @@ static bool SendAppendRequests(Safekeeper *sk); static bool RecvAppendResponses(Safekeeper *sk); static XLogRecPtr CalculateMinFlushLsn(WalProposer *wp); static XLogRecPtr GetAcknowledgedByQuorumWALPosition(WalProposer *wp); +static void PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version); static void HandleSafekeeperResponse(WalProposer *wp, Safekeeper *sk); static bool AsyncRead(Safekeeper *sk, char **buf, int *buf_size); static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg); @@ -81,6 +82,8 @@ static char *FormatSafekeeperState(Safekeeper *sk); static void AssertEventsOkForState(uint32 events, Safekeeper *sk); static char *FormatEvents(WalProposer *wp, uint32 events); static void UpdateDonorShmem(WalProposer *wp); +static char *MembershipConfigurationToString(MembershipConfiguration *mconf); +static void MembershipConfigurationFree(MembershipConfiguration *mconf); WalProposer * WalProposerCreate(WalProposerConfig *config, walproposer_api api) @@ -137,25 +140,21 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api) } wp->quorum = wp->n_safekeepers / 2 + 1; + if (wp->config->proto_version != 2 && wp->config->proto_version != 3) + wp_log(FATAL, "unsupported safekeeper protocol version %d", wp->config->proto_version); + wp_log(LOG, "using safekeeper protocol version %d", wp->config->proto_version); + /* Fill the greeting package */ - wp->greetRequest.tag = 'g'; - wp->greetRequest.protocolVersion = SK_PROTOCOL_VERSION; - wp->greetRequest.pgVersion = PG_VERSION_NUM; - wp->api.strong_random(wp, &wp->greetRequest.proposerId, sizeof(wp->greetRequest.proposerId)); - wp->greetRequest.systemId = wp->config->systemId; - if (!wp->config->neon_timeline) - wp_log(FATAL, "neon.timeline_id is not provided"); - if (*wp->config->neon_timeline != '\0' && - !HexDecodeString(wp->greetRequest.timeline_id, wp->config->neon_timeline, 16)) - wp_log(FATAL, "could not parse neon.timeline_id, %s", wp->config->neon_timeline); + wp->greetRequest.pam.tag = 'g'; if (!wp->config->neon_tenant) wp_log(FATAL, "neon.tenant_id is not provided"); - if (*wp->config->neon_tenant != '\0' && - !HexDecodeString(wp->greetRequest.tenant_id, wp->config->neon_tenant, 16)) - wp_log(FATAL, "could not parse neon.tenant_id, %s", wp->config->neon_tenant); - - wp->greetRequest.timeline = wp->config->pgTimeline; - wp->greetRequest.walSegSize = wp->config->wal_segment_size; + wp->greetRequest.tenant_id = wp->config->neon_tenant; + if (!wp->config->neon_timeline) + wp_log(FATAL, "neon.timeline_id is not provided"); + wp->greetRequest.timeline_id = wp->config->neon_timeline; + wp->greetRequest.pg_version = PG_VERSION_NUM; + wp->greetRequest.system_id = wp->config->systemId; + wp->greetRequest.wal_seg_size = wp->config->wal_segment_size; wp->api.init_event_set(wp); @@ -165,12 +164,14 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api) void WalProposerFree(WalProposer *wp) { + MembershipConfigurationFree(&wp->mconf); for (int i = 0; i < wp->n_safekeepers; i++) { Safekeeper *sk = &wp->safekeeper[i]; Assert(sk->outbuf.data != NULL); pfree(sk->outbuf.data); + MembershipConfigurationFree(&sk->greetResponse.mconf); if (sk->voteResponse.termHistory.entries) pfree(sk->voteResponse.termHistory.entries); sk->voteResponse.termHistory.entries = NULL; @@ -308,6 +309,7 @@ ShutdownConnection(Safekeeper *sk) sk->state = SS_OFFLINE; sk->streamingAt = InvalidXLogRecPtr; + MembershipConfigurationFree(&sk->greetResponse.mconf); if (sk->voteResponse.termHistory.entries) pfree(sk->voteResponse.termHistory.entries); sk->voteResponse.termHistory.entries = NULL; @@ -598,11 +600,14 @@ static void SendStartWALPush(Safekeeper *sk) { WalProposer *wp = sk->wp; +#define CMD_LEN 512 + char cmd[CMD_LEN]; - if (!wp->api.conn_send_query(sk, "START_WAL_PUSH")) + snprintf(cmd, CMD_LEN, "START_WAL_PUSH (proto_version '%d')", wp->config->proto_version); + if (!wp->api.conn_send_query(sk, cmd)) { - wp_log(WARNING, "failed to send 'START_WAL_PUSH' query to safekeeper %s:%s: %s", - sk->host, sk->port, wp->api.conn_error_message(sk)); + wp_log(WARNING, "failed to send '%s' query to safekeeper %s:%s: %s", + cmd, sk->host, sk->port, wp->api.conn_error_message(sk)); ShutdownConnection(sk); return; } @@ -658,23 +663,33 @@ RecvStartWALPushResult(Safekeeper *sk) /* * Start handshake: first of all send information about the - * safekeeper. After sending, we wait on SS_HANDSHAKE_RECV for + * walproposer. After sending, we wait on SS_HANDSHAKE_RECV for * a response to finish the handshake. */ static void SendProposerGreeting(Safekeeper *sk) { + WalProposer *wp = sk->wp; + char *mconf_toml = MembershipConfigurationToString(&wp->greetRequest.mconf); + + wp_log(LOG, "sending ProposerGreeting to safekeeper %s:%s with mconf = %s", sk->host, sk->port, mconf_toml); + pfree(mconf_toml); + + PAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->greetRequest, + &sk->outbuf, wp->config->proto_version); + /* * On failure, logging & resetting the connection is handled. We just need * to handle the control flow. */ - BlockingWrite(sk, &sk->wp->greetRequest, sizeof(sk->wp->greetRequest), SS_HANDSHAKE_RECV); + BlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_HANDSHAKE_RECV); } static void RecvAcceptorGreeting(Safekeeper *sk) { WalProposer *wp = sk->wp; + char *mconf_toml; /* * If our reading doesn't immediately succeed, any necessary error @@ -685,7 +700,10 @@ RecvAcceptorGreeting(Safekeeper *sk) if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->greetResponse)) return; - wp_log(LOG, "received AcceptorGreeting from safekeeper %s:%s, term=" INT64_FORMAT, sk->host, sk->port, sk->greetResponse.term); + mconf_toml = MembershipConfigurationToString(&sk->greetResponse.mconf); + wp_log(LOG, "received AcceptorGreeting from safekeeper %s:%s, node_id = %lu, mconf = %s, term=" UINT64_FORMAT, + sk->host, sk->port, sk->greetResponse.nodeId, mconf_toml, sk->greetResponse.term); + pfree(mconf_toml); /* Protocol is all good, move to voting. */ sk->state = SS_VOTING; @@ -707,12 +725,9 @@ RecvAcceptorGreeting(Safekeeper *sk) wp->propTerm++; wp_log(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, wp->quorum, wp->propTerm); - wp->voteRequest = (VoteRequest) - { - .tag = 'v', - .term = wp->propTerm - }; - memcpy(wp->voteRequest.proposerId.data, wp->greetRequest.proposerId.data, UUID_LEN); + wp->voteRequest.pam.tag = 'v'; + wp->voteRequest.generation = wp->mconf.generation; + wp->voteRequest.term = wp->propTerm; } } else if (sk->greetResponse.term > wp->propTerm) @@ -759,12 +774,14 @@ SendVoteRequest(Safekeeper *sk) { WalProposer *wp = sk->wp; - /* We have quorum for voting, send our vote request */ - wp_log(LOG, "requesting vote from %s:%s for term " UINT64_FORMAT, sk->host, sk->port, wp->voteRequest.term); - /* On failure, logging & resetting is handled */ - if (!BlockingWrite(sk, &wp->voteRequest, sizeof(wp->voteRequest), SS_WAIT_VERDICT)) - return; + PAMessageSerialize(wp, (ProposerAcceptorMessage *) &wp->voteRequest, + &sk->outbuf, wp->config->proto_version); + /* We have quorum for voting, send our vote request */ + wp_log(LOG, "requesting vote from %s:%s for generation %u term " UINT64_FORMAT, sk->host, sk->port, + wp->voteRequest.generation, wp->voteRequest.term); + /* On failure, logging & resetting is handled */ + BlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_WAIT_VERDICT); /* If successful, wait for read-ready with SS_WAIT_VERDICT */ } @@ -778,11 +795,12 @@ RecvVoteResponse(Safekeeper *sk) return; wp_log(LOG, - "got VoteResponse from acceptor %s:%s, voteGiven=" UINT64_FORMAT ", epoch=" UINT64_FORMAT ", flushLsn=%X/%X, truncateLsn=%X/%X, timelineStartLsn=%X/%X", - sk->host, sk->port, sk->voteResponse.voteGiven, GetHighestTerm(&sk->voteResponse.termHistory), + "got VoteResponse from acceptor %s:%s, generation=%u, term=%lu, voteGiven=%u, last_log_term=" UINT64_FORMAT ", flushLsn=%X/%X, truncateLsn=%X/%X", + sk->host, sk->port, sk->voteResponse.generation, sk->voteResponse.term, + sk->voteResponse.voteGiven, + GetHighestTerm(&sk->voteResponse.termHistory), LSN_FORMAT_ARGS(sk->voteResponse.flushLsn), - LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn), - LSN_FORMAT_ARGS(sk->voteResponse.timelineStartLsn)); + LSN_FORMAT_ARGS(sk->voteResponse.truncateLsn)); /* * In case of acceptor rejecting our vote, bail out, but only if either it @@ -847,9 +865,9 @@ HandleElectedProposer(WalProposer *wp) * otherwise we must be sync-safekeepers and we have nothing to do then. * * Proceeding is not only pointless but harmful, because we'd give - * safekeepers term history starting with 0/0. These hacks will go away once - * we disable implicit timeline creation on safekeepers and create it with - * non zero LSN from the start. + * safekeepers term history starting with 0/0. These hacks will go away + * once we disable implicit timeline creation on safekeepers and create it + * with non zero LSN from the start. */ if (wp->propEpochStartLsn == InvalidXLogRecPtr) { @@ -942,7 +960,6 @@ DetermineEpochStartLsn(WalProposer *wp) wp->propEpochStartLsn = InvalidXLogRecPtr; wp->donorEpoch = 0; wp->truncateLsn = InvalidXLogRecPtr; - wp->timelineStartLsn = InvalidXLogRecPtr; for (int i = 0; i < wp->n_safekeepers; i++) { @@ -959,20 +976,6 @@ DetermineEpochStartLsn(WalProposer *wp) wp->donor = i; } wp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn); - - if (wp->safekeeper[i].voteResponse.timelineStartLsn != InvalidXLogRecPtr) - { - /* timelineStartLsn should be the same everywhere or unknown */ - if (wp->timelineStartLsn != InvalidXLogRecPtr && - wp->timelineStartLsn != wp->safekeeper[i].voteResponse.timelineStartLsn) - { - wp_log(WARNING, - "inconsistent timelineStartLsn: current %X/%X, received %X/%X", - LSN_FORMAT_ARGS(wp->timelineStartLsn), - LSN_FORMAT_ARGS(wp->safekeeper[i].voteResponse.timelineStartLsn)); - } - wp->timelineStartLsn = wp->safekeeper[i].voteResponse.timelineStartLsn; - } } } @@ -995,22 +998,11 @@ DetermineEpochStartLsn(WalProposer *wp) if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers) { wp->propEpochStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn(wp); - if (wp->timelineStartLsn == InvalidXLogRecPtr) - { - wp->timelineStartLsn = wp->api.get_redo_start_lsn(wp); - } wp_log(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn)); } pg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propEpochStartLsn); - /* - * Safekeepers are setting truncateLsn after timelineStartLsn is known, so - * it should never be zero at this point, if we know timelineStartLsn. - * - * timelineStartLsn can be zero only on the first syncSafekeepers run. - */ - Assert((wp->truncateLsn != InvalidXLogRecPtr) || - (wp->config->syncSafekeepers && wp->truncateLsn == wp->timelineStartLsn)); + Assert(wp->truncateLsn != InvalidXLogRecPtr || wp->config->syncSafekeepers); /* * We will be generating WAL since propEpochStartLsn, so we should set @@ -1053,10 +1045,11 @@ DetermineEpochStartLsn(WalProposer *wp) if (SkipXLogPageHeader(wp, wp->propEpochStartLsn) != wp->api.get_redo_start_lsn(wp)) { /* - * However, allow to proceed if last_log_term on the node which gave - * the highest vote (i.e. point where we are going to start writing) - * actually had been won by me; plain restart of walproposer not - * intervened by concurrent compute which wrote WAL is ok. + * However, allow to proceed if last_log_term on the node which + * gave the highest vote (i.e. point where we are going to start + * writing) actually had been won by me; plain restart of + * walproposer not intervened by concurrent compute which wrote + * WAL is ok. * * This avoids compute crash after manual term_bump. */ @@ -1126,14 +1119,8 @@ SendProposerElected(Safekeeper *sk) { /* safekeeper is empty or no common point, start from the beginning */ sk->startStreamingAt = wp->propTermHistory.entries[0].lsn; - wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, timelineStartLsn=%X/%X, termHistory.n_entries=%u", - sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), LSN_FORMAT_ARGS(wp->timelineStartLsn), wp->propTermHistory.n_entries); - - /* - * wp->timelineStartLsn == InvalidXLogRecPtr can be only when timeline - * is created manually (test_s3_wal_replay) - */ - Assert(sk->startStreamingAt == wp->timelineStartLsn || wp->timelineStartLsn == InvalidXLogRecPtr); + wp_log(LOG, "no common point with sk %s:%s, streaming since first term at %X/%X, termHistory.n_entries=%u", + sk->host, sk->port, LSN_FORMAT_ARGS(sk->startStreamingAt), wp->propTermHistory.n_entries); } else { @@ -1158,29 +1145,19 @@ SendProposerElected(Safekeeper *sk) Assert(sk->startStreamingAt <= wp->availableLsn); - msg.tag = 'e'; + msg.apm.tag = 'e'; + msg.generation = wp->mconf.generation; msg.term = wp->propTerm; msg.startStreamingAt = sk->startStreamingAt; msg.termHistory = &wp->propTermHistory; - msg.timelineStartLsn = wp->timelineStartLsn; lastCommonTerm = idx >= 0 ? wp->propTermHistory.entries[idx].term : 0; wp_log(LOG, - "sending elected msg to node " UINT64_FORMAT " term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s, timelineStartLsn=%X/%X", - sk->greetResponse.nodeId, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port, LSN_FORMAT_ARGS(msg.timelineStartLsn)); - - resetStringInfo(&sk->outbuf); - pq_sendint64_le(&sk->outbuf, msg.tag); - pq_sendint64_le(&sk->outbuf, msg.term); - pq_sendint64_le(&sk->outbuf, msg.startStreamingAt); - pq_sendint32_le(&sk->outbuf, msg.termHistory->n_entries); - for (int i = 0; i < msg.termHistory->n_entries; i++) - { - pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].term); - pq_sendint64_le(&sk->outbuf, msg.termHistory->entries[i].lsn); - } - pq_sendint64_le(&sk->outbuf, msg.timelineStartLsn); + "sending elected msg to node " UINT64_FORMAT " generation=%u term=" UINT64_FORMAT ", startStreamingAt=%X/%X (lastCommonTerm=" UINT64_FORMAT "), termHistory.n_entries=%u to %s:%s", + sk->greetResponse.nodeId, msg.generation, msg.term, LSN_FORMAT_ARGS(msg.startStreamingAt), + lastCommonTerm, msg.termHistory->n_entries, sk->host, sk->port); + PAMessageSerialize(wp, (ProposerAcceptorMessage *) &msg, &sk->outbuf, wp->config->proto_version); if (!AsyncWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_SEND_ELECTED_FLUSH)) return; @@ -1246,14 +1223,13 @@ static void PrepareAppendRequest(WalProposer *wp, AppendRequestHeader *req, XLogRecPtr beginLsn, XLogRecPtr endLsn) { Assert(endLsn >= beginLsn); - req->tag = 'a'; + req->apm.tag = 'a'; + req->generation = wp->mconf.generation; req->term = wp->propTerm; - req->epochStartLsn = wp->propEpochStartLsn; req->beginLsn = beginLsn; req->endLsn = endLsn; req->commitLsn = wp->commitLsn; req->truncateLsn = wp->truncateLsn; - req->proposerId = wp->greetRequest.proposerId; } /* @@ -1354,7 +1330,8 @@ SendAppendRequests(Safekeeper *sk) resetStringInfo(&sk->outbuf); /* write AppendRequest header */ - appendBinaryStringInfo(&sk->outbuf, (char *) req, sizeof(AppendRequestHeader)); + PAMessageSerialize(wp, (ProposerAcceptorMessage *) req, &sk->outbuf, wp->config->proto_version); + /* prepare for reading WAL into the outbuf */ enlargeStringInfo(&sk->outbuf, req->endLsn - req->beginLsn); sk->active_state = SS_ACTIVE_READ_WAL; } @@ -1367,14 +1344,17 @@ SendAppendRequests(Safekeeper *sk) req = &sk->appendRequest; req_len = req->endLsn - req->beginLsn; - /* We send zero sized AppenRequests as heartbeats; don't wal_read for these. */ + /* + * We send zero sized AppenRequests as heartbeats; don't wal_read + * for these. + */ if (req_len > 0) { switch (wp->api.wal_read(sk, - &sk->outbuf.data[sk->outbuf.len], - req->beginLsn, - req_len, - &errmsg)) + &sk->outbuf.data[sk->outbuf.len], + req->beginLsn, + req_len, + &errmsg)) { case NEON_WALREAD_SUCCESS: break; @@ -1382,7 +1362,7 @@ SendAppendRequests(Safekeeper *sk) return true; case NEON_WALREAD_ERROR: wp_log(WARNING, "WAL reading for node %s:%s failed: %s", - sk->host, sk->port, errmsg); + sk->host, sk->port, errmsg); ShutdownConnection(sk); return false; default: @@ -1470,11 +1450,11 @@ RecvAppendResponses(Safekeeper *sk) * Term has changed to higher one, probably another compute is * running. If this is the case we could PANIC as well because * likely it inserted some data and our basebackup is unsuitable - * anymore. However, we also bump term manually (term_bump endpoint) - * on safekeepers for migration purposes, in this case we do want - * compute to stay alive. So restart walproposer with FATAL instead - * of panicking; if basebackup is spoiled next election will notice - * this. + * anymore. However, we also bump term manually (term_bump + * endpoint) on safekeepers for migration purposes, in this case + * we do want compute to stay alive. So restart walproposer with + * FATAL instead of panicking; if basebackup is spoiled next + * election will notice this. */ wp_log(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejected our request, our term " INT64_FORMAT ", meaning another compute is running at the same time, and it conflicts with us", sk->host, sk->port, @@ -1509,7 +1489,7 @@ ParsePageserverFeedbackMessage(WalProposer *wp, StringInfo reply_message, Pagese for (i = 0; i < nkeys; i++) { - const char *key = pq_getmsgstring(reply_message); + const char *key = pq_getmsgrawstring(reply_message); unsigned int value_len = pq_getmsgint(reply_message, sizeof(int32)); if (strcmp(key, "current_timeline_size") == 0) @@ -1750,6 +1730,208 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk) } } +/* Serialize MembershipConfiguration into buf. */ +static void +MembershipConfigurationSerialize(MembershipConfiguration *mconf, StringInfo buf) +{ + uint32 i; + + pq_sendint32(buf, mconf->generation); + + pq_sendint32(buf, mconf->members.len); + for (i = 0; i < mconf->members.len; i++) + { + pq_sendint64(buf, mconf->members.m[i].node_id); + pq_send_ascii_string(buf, mconf->members.m[i].host); + pq_sendint16(buf, mconf->members.m[i].port); + } + + /* + * There is no special mark for absent new_members; zero members in + * invalid, so zero len means absent. + */ + pq_sendint32(buf, mconf->new_members.len); + for (i = 0; i < mconf->new_members.len; i++) + { + pq_sendint64(buf, mconf->new_members.m[i].node_id); + pq_send_ascii_string(buf, mconf->new_members.m[i].host); + pq_sendint16(buf, mconf->new_members.m[i].port); + } +} + +/* Serialize proposer -> acceptor message into buf using specified version */ +static void +PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf, int proto_version) +{ + /* both version are supported currently until we fully migrate to 3 */ + Assert(proto_version == 3 || proto_version == 2); + + resetStringInfo(buf); + + if (proto_version == 3) + { + /* + * v2 sends structs for some messages as is, so commonly send tag only + * for v3 + */ + pq_sendint8(buf, msg->tag); + + switch (msg->tag) + { + case 'g': + { + ProposerGreeting *m = (ProposerGreeting *) msg; + + pq_send_ascii_string(buf, m->tenant_id); + pq_send_ascii_string(buf, m->timeline_id); + MembershipConfigurationSerialize(&m->mconf, buf); + pq_sendint32(buf, m->pg_version); + pq_sendint64(buf, m->system_id); + pq_sendint32(buf, m->wal_seg_size); + break; + } + case 'v': + { + VoteRequest *m = (VoteRequest *) msg; + + pq_sendint32(buf, m->generation); + pq_sendint64(buf, m->term); + break; + + } + case 'e': + { + ProposerElected *m = (ProposerElected *) msg; + + pq_sendint32(buf, m->generation); + pq_sendint64(buf, m->term); + pq_sendint64(buf, m->startStreamingAt); + pq_sendint32(buf, m->termHistory->n_entries); + for (uint32 i = 0; i < m->termHistory->n_entries; i++) + { + pq_sendint64(buf, m->termHistory->entries[i].term); + pq_sendint64(buf, m->termHistory->entries[i].lsn); + } + break; + } + case 'a': + { + /* + * Note: this serializes only AppendRequestHeader, caller + * is expected to append WAL data later. + */ + AppendRequestHeader *m = (AppendRequestHeader *) msg; + + pq_sendint32(buf, m->generation); + pq_sendint64(buf, m->term); + pq_sendint64(buf, m->beginLsn); + pq_sendint64(buf, m->endLsn); + pq_sendint64(buf, m->commitLsn); + pq_sendint64(buf, m->truncateLsn); + break; + } + default: + wp_log(FATAL, "unexpected message type %c to serialize", msg->tag); + } + return; + } + + if (proto_version == 2) + { + switch (msg->tag) + { + case 'g': + { + /* v2 sent struct as is */ + ProposerGreeting *m = (ProposerGreeting *) msg; + ProposerGreetingV2 greetRequestV2; + + /* Fill also v2 struct. */ + greetRequestV2.tag = 'g'; + greetRequestV2.protocolVersion = proto_version; + greetRequestV2.pgVersion = m->pg_version; + + /* + * v3 removed this field because it's easier to pass as + * libq or START_WAL_PUSH options + */ + memset(&greetRequestV2.proposerId, 0, sizeof(greetRequestV2.proposerId)); + greetRequestV2.systemId = wp->config->systemId; + if (*m->timeline_id != '\0' && + !HexDecodeString(greetRequestV2.timeline_id, m->timeline_id, 16)) + wp_log(FATAL, "could not parse neon.timeline_id, %s", m->timeline_id); + if (*m->tenant_id != '\0' && + !HexDecodeString(greetRequestV2.tenant_id, m->tenant_id, 16)) + wp_log(FATAL, "could not parse neon.tenant_id, %s", m->tenant_id); + + greetRequestV2.timeline = wp->config->pgTimeline; + greetRequestV2.walSegSize = wp->config->wal_segment_size; + + pq_sendbytes(buf, (char *) &greetRequestV2, sizeof(greetRequestV2)); + break; + } + case 'v': + { + /* v2 sent struct as is */ + VoteRequest *m = (VoteRequest *) msg; + VoteRequestV2 voteRequestV2; + + voteRequestV2.tag = m->pam.tag; + voteRequestV2.term = m->term; + /* removed field */ + memset(&voteRequestV2.proposerId, 0, sizeof(voteRequestV2.proposerId)); + pq_sendbytes(buf, (char *) &voteRequestV2, sizeof(voteRequestV2)); + break; + } + case 'e': + { + ProposerElected *m = (ProposerElected *) msg; + + pq_sendint64_le(buf, m->apm.tag); + pq_sendint64_le(buf, m->term); + pq_sendint64_le(buf, m->startStreamingAt); + pq_sendint32_le(buf, m->termHistory->n_entries); + for (int i = 0; i < m->termHistory->n_entries; i++) + { + pq_sendint64_le(buf, m->termHistory->entries[i].term); + pq_sendint64_le(buf, m->termHistory->entries[i].lsn); + } + pq_sendint64_le(buf, 0); /* removed timeline_start_lsn */ + break; + } + case 'a': + + /* + * Note: this serializes only AppendRequestHeader, caller is + * expected to append WAL data later. + */ + { + /* v2 sent struct as is */ + AppendRequestHeader *m = (AppendRequestHeader *) msg; + AppendRequestHeaderV2 appendRequestHeaderV2; + + appendRequestHeaderV2.tag = m->apm.tag; + appendRequestHeaderV2.term = m->term; + appendRequestHeaderV2.epochStartLsn = 0; /* removed field */ + appendRequestHeaderV2.beginLsn = m->beginLsn; + appendRequestHeaderV2.endLsn = m->endLsn; + appendRequestHeaderV2.commitLsn = m->commitLsn; + appendRequestHeaderV2.truncateLsn = m->truncateLsn; + /* removed field */ + memset(&appendRequestHeaderV2.proposerId, 0, sizeof(appendRequestHeaderV2.proposerId)); + + pq_sendbytes(buf, (char *) &appendRequestHeaderV2, sizeof(appendRequestHeaderV2)); + break; + } + + default: + wp_log(FATAL, "unexpected message type %c to serialize", msg->tag); + } + return; + } + wp_log(FATAL, "unexpected proto_version %d", proto_version); +} + /* * Try to read CopyData message from i'th safekeeper, resetting connection on * failure. @@ -1779,6 +1961,37 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size) return false; } +/* Deserialize membership configuration from buf to mconf. */ +static void +MembershipConfigurationDeserialize(MembershipConfiguration *mconf, StringInfo buf) +{ + uint32 i; + + mconf->generation = pq_getmsgint32(buf); + mconf->members.len = pq_getmsgint32(buf); + mconf->members.m = palloc0(sizeof(SafekeeperId) * mconf->members.len); + for (i = 0; i < mconf->members.len; i++) + { + const char *buf_host; + + mconf->members.m[i].node_id = pq_getmsgint64(buf); + buf_host = pq_getmsgrawstring(buf); + strlcpy(mconf->members.m[i].host, buf_host, sizeof(mconf->members.m[i].host)); + mconf->members.m[i].port = pq_getmsgint16(buf); + } + mconf->new_members.len = pq_getmsgint32(buf); + mconf->new_members.m = palloc0(sizeof(SafekeeperId) * mconf->new_members.len); + for (i = 0; i < mconf->new_members.len; i++) + { + const char *buf_host; + + mconf->new_members.m[i].node_id = pq_getmsgint64(buf); + buf_host = pq_getmsgrawstring(buf); + strlcpy(mconf->new_members.m[i].host, buf_host, sizeof(mconf->new_members.m[i].host)); + mconf->new_members.m[i].port = pq_getmsgint16(buf); + } +} + /* * Read next message with known type into provided struct, by reading a CopyData * block from the safekeeper's postgres connection, returning whether the read @@ -1787,6 +2000,8 @@ AsyncRead(Safekeeper *sk, char **buf, int *buf_size) * If the read needs more polling, we return 'false' and keep the state * unmodified, waiting until it becomes read-ready to try again. If it fully * failed, a warning is emitted and the connection is reset. + * + * Note: it pallocs if needed, i.e. for AcceptorGreeting and VoteResponse fields. */ static bool AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg) @@ -1795,82 +2010,154 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg) char *buf; int buf_size; - uint64 tag; + uint8 tag; StringInfoData s; if (!(AsyncRead(sk, &buf, &buf_size))) return false; + sk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp); /* parse it */ s.data = buf; s.len = buf_size; + s.maxlen = buf_size; s.cursor = 0; - tag = pq_getmsgint64_le(&s); - if (tag != anymsg->tag) + if (wp->config->proto_version == 3) { - wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host, - sk->port, FormatSafekeeperState(sk)); - ResetConnection(sk); - return false; - } - sk->latestMsgReceivedAt = wp->api.get_current_timestamp(wp); - switch (tag) - { - case 'g': - { - AcceptorGreeting *msg = (AcceptorGreeting *) anymsg; - - msg->term = pq_getmsgint64_le(&s); - msg->nodeId = pq_getmsgint64_le(&s); - pq_getmsgend(&s); - return true; - } - - case 'v': - { - VoteResponse *msg = (VoteResponse *) anymsg; - - msg->term = pq_getmsgint64_le(&s); - msg->voteGiven = pq_getmsgint64_le(&s); - msg->flushLsn = pq_getmsgint64_le(&s); - msg->truncateLsn = pq_getmsgint64_le(&s); - msg->termHistory.n_entries = pq_getmsgint32_le(&s); - msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries); - for (int i = 0; i < msg->termHistory.n_entries; i++) + tag = pq_getmsgbyte(&s); + if (tag != anymsg->tag) + { + wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host, + sk->port, FormatSafekeeperState(sk)); + ResetConnection(sk); + return false; + } + switch (tag) + { + case 'g': { - msg->termHistory.entries[i].term = pq_getmsgint64_le(&s); - msg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s); + AcceptorGreeting *msg = (AcceptorGreeting *) anymsg; + + msg->nodeId = pq_getmsgint64(&s); + MembershipConfigurationDeserialize(&msg->mconf, &s); + msg->term = pq_getmsgint64(&s); + pq_getmsgend(&s); + return true; } - msg->timelineStartLsn = pq_getmsgint64_le(&s); - pq_getmsgend(&s); - return true; - } + case 'v': + { + VoteResponse *msg = (VoteResponse *) anymsg; - case 'a': - { - AppendResponse *msg = (AppendResponse *) anymsg; + msg->generation = pq_getmsgint32(&s); + msg->term = pq_getmsgint64(&s); + msg->voteGiven = pq_getmsgbyte(&s); + msg->flushLsn = pq_getmsgint64(&s); + msg->truncateLsn = pq_getmsgint64(&s); + msg->termHistory.n_entries = pq_getmsgint32(&s); + msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries); + for (uint32 i = 0; i < msg->termHistory.n_entries; i++) + { + msg->termHistory.entries[i].term = pq_getmsgint64(&s); + msg->termHistory.entries[i].lsn = pq_getmsgint64(&s); + } + pq_getmsgend(&s); + return true; + } + case 'a': + { + AppendResponse *msg = (AppendResponse *) anymsg; - msg->term = pq_getmsgint64_le(&s); - msg->flushLsn = pq_getmsgint64_le(&s); - msg->commitLsn = pq_getmsgint64_le(&s); - msg->hs.ts = pq_getmsgint64_le(&s); - msg->hs.xmin.value = pq_getmsgint64_le(&s); - msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s); - if (s.len > s.cursor) - ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback); - else - msg->ps_feedback.present = false; - pq_getmsgend(&s); - return true; - } - - default: - { - Assert(false); - return false; - } + msg->generation = pq_getmsgint32(&s); + msg->term = pq_getmsgint64(&s); + msg->flushLsn = pq_getmsgint64(&s); + msg->commitLsn = pq_getmsgint64(&s); + msg->hs.ts = pq_getmsgint64(&s); + msg->hs.xmin.value = pq_getmsgint64(&s); + msg->hs.catalog_xmin.value = pq_getmsgint64(&s); + if (s.len > s.cursor) + ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback); + else + msg->ps_feedback.present = false; + pq_getmsgend(&s); + return true; + } + default: + { + wp_log(FATAL, "unexpected message tag %c to read", (char) tag); + return false; + } + } } + else if (wp->config->proto_version == 2) + { + tag = pq_getmsgint64_le(&s); + if (tag != anymsg->tag) + { + wp_log(WARNING, "unexpected message tag %c from node %s:%s in state %s", (char) tag, sk->host, + sk->port, FormatSafekeeperState(sk)); + ResetConnection(sk); + return false; + } + switch (tag) + { + case 'g': + { + AcceptorGreeting *msg = (AcceptorGreeting *) anymsg; + + msg->term = pq_getmsgint64_le(&s); + msg->nodeId = pq_getmsgint64_le(&s); + pq_getmsgend(&s); + return true; + } + + case 'v': + { + VoteResponse *msg = (VoteResponse *) anymsg; + + msg->term = pq_getmsgint64_le(&s); + msg->voteGiven = pq_getmsgint64_le(&s); + msg->flushLsn = pq_getmsgint64_le(&s); + msg->truncateLsn = pq_getmsgint64_le(&s); + msg->termHistory.n_entries = pq_getmsgint32_le(&s); + msg->termHistory.entries = palloc(sizeof(TermSwitchEntry) * msg->termHistory.n_entries); + for (int i = 0; i < msg->termHistory.n_entries; i++) + { + msg->termHistory.entries[i].term = pq_getmsgint64_le(&s); + msg->termHistory.entries[i].lsn = pq_getmsgint64_le(&s); + } + pq_getmsgint64_le(&s); /* timelineStartLsn */ + pq_getmsgend(&s); + return true; + } + + case 'a': + { + AppendResponse *msg = (AppendResponse *) anymsg; + + msg->term = pq_getmsgint64_le(&s); + msg->flushLsn = pq_getmsgint64_le(&s); + msg->commitLsn = pq_getmsgint64_le(&s); + msg->hs.ts = pq_getmsgint64_le(&s); + msg->hs.xmin.value = pq_getmsgint64_le(&s); + msg->hs.catalog_xmin.value = pq_getmsgint64_le(&s); + if (s.len > s.cursor) + ParsePageserverFeedbackMessage(wp, &s, &msg->ps_feedback); + else + msg->ps_feedback.present = false; + pq_getmsgend(&s); + return true; + } + + default: + { + wp_log(FATAL, "unexpected message tag %c to read", (char) tag); + return false; + } + } + } + wp_log(FATAL, "unsupported proto_version %d", wp->config->proto_version); + return false; /* keep the compiler quiet */ } /* @@ -2246,3 +2533,45 @@ FormatEvents(WalProposer *wp, uint32 events) return (char *) &return_str; } + +/* Dump mconf as toml for observability / debugging. Result is palloc'ed. */ +static char * +MembershipConfigurationToString(MembershipConfiguration *mconf) +{ + StringInfoData s; + uint32 i; + + initStringInfo(&s); + appendStringInfo(&s, "{gen = %u", mconf->generation); + appendStringInfoString(&s, ", members = ["); + for (i = 0; i < mconf->members.len; i++) + { + if (i > 0) + appendStringInfoString(&s, ", "); + appendStringInfo(&s, "{node_id = %lu", mconf->members.m[i].node_id); + appendStringInfo(&s, ", host = %s", mconf->members.m[i].host); + appendStringInfo(&s, ", port = %u }", mconf->members.m[i].port); + } + appendStringInfo(&s, "], new_members = ["); + for (i = 0; i < mconf->new_members.len; i++) + { + if (i > 0) + appendStringInfoString(&s, ", "); + appendStringInfo(&s, "{node_id = %lu", mconf->new_members.m[i].node_id); + appendStringInfo(&s, ", host = %s", mconf->new_members.m[i].host); + appendStringInfo(&s, ", port = %u }", mconf->new_members.m[i].port); + } + appendStringInfoString(&s, "]}"); + return s.data; +} + +static void +MembershipConfigurationFree(MembershipConfiguration *mconf) +{ + if (mconf->members.m) + pfree(mconf->members.m); + mconf->members.m = NULL; + if (mconf->new_members.m) + pfree(mconf->new_members.m); + mconf->new_members.m = NULL; +} diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h index d8c44f8182..eee55f924f 100644 --- a/pgxn/neon/walproposer.h +++ b/pgxn/neon/walproposer.h @@ -12,9 +12,6 @@ #include "neon_walreader.h" #include "pagestore_client.h" -#define SK_MAGIC 0xCafeCeefu -#define SK_PROTOCOL_VERSION 2 - #define MAX_SAFEKEEPERS 32 #define MAX_SEND_SIZE (XLOG_BLCKSZ * 16) /* max size of a single* WAL * message */ @@ -143,12 +140,71 @@ typedef uint64 term_t; /* neon storage node id */ typedef uint64 NNodeId; +/* + * Number uniquely identifying safekeeper membership configuration. + * This and following structs pair ones in membership.rs. + */ +typedef uint32 Generation; + +typedef struct SafekeeperId +{ + NNodeId node_id; + char host[MAXCONNINFO]; + uint16 port; +} SafekeeperId; + +/* Set of safekeepers. */ +typedef struct MemberSet +{ + uint32 len; /* number of members */ + SafekeeperId *m; /* ids themselves */ +} MemberSet; + +/* Timeline safekeeper membership configuration. */ +typedef struct MembershipConfiguration +{ + Generation generation; + MemberSet members; + /* Has 0 n_members in non joint conf. */ + MemberSet new_members; +} MembershipConfiguration; + /* * Proposer <-> Acceptor messaging. */ +typedef struct ProposerAcceptorMessage +{ + uint8 tag; +} ProposerAcceptorMessage; + /* Initial Proposer -> Acceptor message */ typedef struct ProposerGreeting +{ + ProposerAcceptorMessage pam; /* message tag */ + + /* + * tenant/timeline ids as C strings with standard hex notation for ease of + * printing. In principle they are not strictly needed as ttid is also + * passed as libpq options. + */ + char *tenant_id; + char *timeline_id; + /* Full conf is carried to allow safekeeper switch */ + MembershipConfiguration mconf; + + /* + * pg_version and wal_seg_size are used for timeline creation until we + * fully migrate to doing externally. systemId is only used as a sanity + * cross check. + */ + uint32 pg_version; /* in PG_VERSION_NUM format */ + uint64 system_id; /* Postgres system identifier. */ + uint32 wal_seg_size; +} ProposerGreeting; + +/* protocol v2 variant, kept while wp supports it */ +typedef struct ProposerGreetingV2 { uint64 tag; /* message tag */ uint32 protocolVersion; /* proposer-safekeeper protocol version */ @@ -159,32 +215,42 @@ typedef struct ProposerGreeting uint8 tenant_id[16]; TimeLineID timeline; uint32 walSegSize; -} ProposerGreeting; +} ProposerGreetingV2; typedef struct AcceptorProposerMessage { - uint64 tag; + uint8 tag; } AcceptorProposerMessage; /* - * Acceptor -> Proposer initial response: the highest term acceptor voted for. + * Acceptor -> Proposer initial response: the highest term acceptor voted for, + * its node id and configuration. */ typedef struct AcceptorGreeting { AcceptorProposerMessage apm; - term_t term; NNodeId nodeId; + MembershipConfiguration mconf; + term_t term; } AcceptorGreeting; /* * Proposer -> Acceptor vote request. */ typedef struct VoteRequest +{ + ProposerAcceptorMessage pam; /* message tag */ + Generation generation; /* membership conf generation */ + term_t term; +} VoteRequest; + +/* protocol v2 variant, kept while wp supports it */ +typedef struct VoteRequestV2 { uint64 tag; term_t term; pg_uuid_t proposerId; /* for monitoring/debugging */ -} VoteRequest; +} VoteRequestV2; /* Element of term switching chain. */ typedef struct TermSwitchEntry @@ -203,8 +269,15 @@ typedef struct TermHistory typedef struct VoteResponse { AcceptorProposerMessage apm; + + /* + * Membership conf generation. It's not strictly required because on + * mismatch safekeeper is expected to ERROR the connection, but let's + * sanity check it. + */ + Generation generation; term_t term; - uint64 voteGiven; + uint8 voteGiven; /* * Safekeeper flush_lsn (end of WAL) + history of term switches allow @@ -214,7 +287,6 @@ typedef struct VoteResponse XLogRecPtr truncateLsn; /* minimal LSN which may be needed for* * recovery of some safekeeper */ TermHistory termHistory; - XLogRecPtr timelineStartLsn; /* timeline globally starts at this LSN */ } VoteResponse; /* @@ -223,20 +295,37 @@ typedef struct VoteResponse */ typedef struct ProposerElected { - uint64 tag; + AcceptorProposerMessage apm; + Generation generation; /* membership conf generation */ term_t term; /* proposer will send since this point */ XLogRecPtr startStreamingAt; /* history of term switches up to this proposer */ TermHistory *termHistory; - /* timeline globally starts at this LSN */ - XLogRecPtr timelineStartLsn; } ProposerElected; /* * Header of request with WAL message sent from proposer to safekeeper. */ typedef struct AppendRequestHeader +{ + AcceptorProposerMessage apm; + Generation generation; /* membership conf generation */ + term_t term; /* term of the proposer */ + XLogRecPtr beginLsn; /* start position of message in WAL */ + XLogRecPtr endLsn; /* end position of message in WAL */ + XLogRecPtr commitLsn; /* LSN committed by quorum of safekeepers */ + + /* + * minimal LSN which may be needed for recovery of some safekeeper (end + * lsn + 1 of last chunk streamed to everyone) + */ + XLogRecPtr truncateLsn; + /* in the AppendRequest message, WAL data follows */ +} AppendRequestHeader; + +/* protocol v2 variant, kept while wp supports it */ +typedef struct AppendRequestHeaderV2 { uint64 tag; term_t term; /* term of the proposer */ @@ -256,7 +345,8 @@ typedef struct AppendRequestHeader */ XLogRecPtr truncateLsn; pg_uuid_t proposerId; /* for monitoring/debugging */ -} AppendRequestHeader; + /* in the AppendRequest message, WAL data follows */ +} AppendRequestHeaderV2; /* * Hot standby feedback received from replica @@ -309,6 +399,13 @@ typedef struct AppendResponse { AcceptorProposerMessage apm; + /* + * Membership conf generation. It's not strictly required because on + * mismatch safekeeper is expected to ERROR the connection, but let's + * sanity check it. + */ + Generation generation; + /* * Current term of the safekeeper; if it is higher than proposer's, the * compute is out of date. @@ -644,6 +741,8 @@ typedef struct WalProposerConfig /* Will be passed to safekeepers in greet request. */ TimeLineID pgTimeline; + int proto_version; + #ifdef WALPROPOSER_LIB void *callback_data; #endif @@ -656,11 +755,14 @@ typedef struct WalProposerConfig typedef struct WalProposer { WalProposerConfig *config; - int n_safekeepers; + /* Current walproposer membership configuration */ + MembershipConfiguration mconf; /* (n_safekeepers / 2) + 1 */ int quorum; + /* Number of occupied slots in safekeepers[] */ + int n_safekeepers; Safekeeper safekeeper[MAX_SAFEKEEPERS]; /* WAL has been generated up to this point */ @@ -670,6 +772,7 @@ typedef struct WalProposer XLogRecPtr commitLsn; ProposerGreeting greetRequest; + ProposerGreetingV2 greetRequestV2; /* Vote request for safekeeper */ VoteRequest voteRequest; diff --git a/pgxn/neon/walproposer_compat.c b/pgxn/neon/walproposer_compat.c index 35d984c52e..a986160224 100644 --- a/pgxn/neon/walproposer_compat.c +++ b/pgxn/neon/walproposer_compat.c @@ -117,14 +117,13 @@ pq_getmsgbytes(StringInfo msg, int datalen) } /* -------------------------------- - * pq_getmsgstring - get a null-terminated text string (with conversion) + * pq_getmsgrawstring - get a null-terminated text string - NO conversion * - * May return a pointer directly into the message buffer, or a pointer - * to a palloc'd conversion result. + * Returns a pointer directly into the message buffer. * -------------------------------- */ const char * -pq_getmsgstring(StringInfo msg) +pq_getmsgrawstring(StringInfo msg) { char *str; int slen; @@ -155,6 +154,45 @@ pq_getmsgend(StringInfo msg) ExceptionalCondition("invalid msg format", __FILE__, __LINE__); } +/* -------------------------------- + * pq_sendbytes - append raw data to a StringInfo buffer + * -------------------------------- + */ +void +pq_sendbytes(StringInfo buf, const void *data, int datalen) +{ + /* use variant that maintains a trailing null-byte, out of caution */ + appendBinaryStringInfo(buf, data, datalen); +} + +/* -------------------------------- + * pq_send_ascii_string - append a null-terminated text string (without conversion) + * + * This function intentionally bypasses encoding conversion, instead just + * silently replacing any non-7-bit-ASCII characters with question marks. + * It is used only when we are having trouble sending an error message to + * the client with normal localization and encoding conversion. The caller + * should already have taken measures to ensure the string is just ASCII; + * the extra work here is just to make certain we don't send a badly encoded + * string to the client (which might or might not be robust about that). + * + * NB: passed text string must be null-terminated, and so is the data + * sent to the frontend. + * -------------------------------- + */ +void +pq_send_ascii_string(StringInfo buf, const char *str) +{ + while (*str) + { + char ch = *str++; + + if (IS_HIGHBIT_SET(ch)) + ch = '?'; + appendStringInfoCharMacro(buf, ch); + } + appendStringInfoChar(buf, '\0'); +} /* * Produce a C-string representation of a TimestampTz. diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c index 86444084ff..b21184de57 100644 --- a/pgxn/neon/walproposer_pg.c +++ b/pgxn/neon/walproposer_pg.c @@ -59,9 +59,11 @@ #define WAL_PROPOSER_SLOT_NAME "wal_proposer_slot" +/* GUCs */ char *wal_acceptors_list = ""; int wal_acceptor_reconnect_timeout = 1000; int wal_acceptor_connection_timeout = 10000; +int safekeeper_proto_version = 2; /* Set to true in the walproposer bgw. */ static bool am_walproposer; @@ -126,6 +128,7 @@ init_walprop_config(bool syncSafekeepers) else walprop_config.systemId = 0; walprop_config.pgTimeline = walprop_pg_get_timeline_id(); + walprop_config.proto_version = safekeeper_proto_version; } /* @@ -219,25 +222,37 @@ nwp_register_gucs(void) PGC_SIGHUP, GUC_UNIT_MS, NULL, NULL, NULL); + + DefineCustomIntVariable( + "neon.safekeeper_proto_version", + "Version of compute <-> safekeeper protocol.", + "Used while migrating from 2 to 3.", + &safekeeper_proto_version, + 2, 0, INT_MAX, + PGC_POSTMASTER, + 0, + NULL, NULL, NULL); } static int split_safekeepers_list(char *safekeepers_list, char *safekeepers[]) { - int n_safekeepers = 0; - char *curr_sk = safekeepers_list; + int n_safekeepers = 0; + char *curr_sk = safekeepers_list; for (char *coma = safekeepers_list; coma != NULL && *coma != '\0'; curr_sk = coma) { - if (++n_safekeepers >= MAX_SAFEKEEPERS) { + if (++n_safekeepers >= MAX_SAFEKEEPERS) + { wpg_log(FATAL, "too many safekeepers"); } coma = strchr(coma, ','); - safekeepers[n_safekeepers-1] = curr_sk; + safekeepers[n_safekeepers - 1] = curr_sk; - if (coma != NULL) { + if (coma != NULL) + { *coma++ = '\0'; } } @@ -252,10 +267,10 @@ split_safekeepers_list(char *safekeepers_list, char *safekeepers[]) static bool safekeepers_cmp(char *old, char *new) { - char *safekeepers_old[MAX_SAFEKEEPERS]; - char *safekeepers_new[MAX_SAFEKEEPERS]; - int len_old = 0; - int len_new = 0; + char *safekeepers_old[MAX_SAFEKEEPERS]; + char *safekeepers_new[MAX_SAFEKEEPERS]; + int len_old = 0; + int len_new = 0; len_old = split_safekeepers_list(old, safekeepers_old); len_new = split_safekeepers_list(new, safekeepers_new); @@ -292,7 +307,8 @@ assign_neon_safekeepers(const char *newval, void *extra) if (!am_walproposer) return; - if (!newval) { + if (!newval) + { /* should never happen */ wpg_log(FATAL, "neon.safekeepers is empty"); } @@ -301,11 +317,11 @@ assign_neon_safekeepers(const char *newval, void *extra) newval_copy = pstrdup(newval); oldval = pstrdup(wal_acceptors_list); - /* + /* * TODO: restarting through FATAL is stupid and introduces 1s delay before - * next bgw start. We should refactor walproposer to allow graceful exit and - * thus remove this delay. - * XXX: If you change anything here, sync with test_safekeepers_reconfigure_reorder. + * next bgw start. We should refactor walproposer to allow graceful exit + * and thus remove this delay. XXX: If you change anything here, sync with + * test_safekeepers_reconfigure_reorder. */ if (!safekeepers_cmp(oldval, newval_copy)) { @@ -454,7 +470,8 @@ backpressure_throttling_impl(void) memcpy(new_status, old_status, len); snprintf(new_status + len, 64, "backpressure throttling: lag %lu", lag); set_ps_display(new_status); - new_status[len] = '\0'; /* truncate off " backpressure ..." to later reset the ps */ + new_status[len] = '\0'; /* truncate off " backpressure ..." to later + * reset the ps */ elog(DEBUG2, "backpressure throttling: lag %lu", lag); start = GetCurrentTimestamp(); @@ -621,7 +638,7 @@ walprop_pg_start_streaming(WalProposer *wp, XLogRecPtr startpos) wpg_log(LOG, "WAL proposer starts streaming at %X/%X", LSN_FORMAT_ARGS(startpos)); cmd.slotname = WAL_PROPOSER_SLOT_NAME; - cmd.timeline = wp->greetRequest.timeline; + cmd.timeline = wp->config->pgTimeline; cmd.startpoint = startpos; StartProposerReplication(wp, &cmd); } @@ -1963,10 +1980,11 @@ walprop_pg_process_safekeeper_feedback(WalProposer *wp, Safekeeper *sk) FullTransactionId xmin = hsFeedback.xmin; FullTransactionId catalog_xmin = hsFeedback.catalog_xmin; FullTransactionId next_xid = ReadNextFullTransactionId(); + /* - * Page server is updating nextXid in checkpoint each 1024 transactions, - * so feedback xmin can be actually larger then nextXid and - * function TransactionIdInRecentPast return false in this case, + * Page server is updating nextXid in checkpoint each 1024 + * transactions, so feedback xmin can be actually larger then nextXid + * and function TransactionIdInRecentPast return false in this case, * preventing update of slot's xmin. */ if (FullTransactionIdPrecedes(next_xid, xmin)) diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs index 19c6662e74..1c0ae66f01 100644 --- a/safekeeper/benches/receive_wal.rs +++ b/safekeeper/benches/receive_wal.rs @@ -13,6 +13,7 @@ use safekeeper::safekeeper::{ AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, }; use safekeeper::test_utils::Env; +use safekeeper_api::membership::SafekeeperGeneration as Generation; use tokio::io::AsyncWriteExt as _; use utils::id::{NodeId, TenantTimelineId}; use utils::lsn::Lsn; @@ -88,13 +89,12 @@ fn bench_process_msg(c: &mut Criterion) { let (lsn, record) = walgen.next().expect("endless WAL"); ProposerAcceptorMessage::AppendRequest(AppendRequest { h: AppendRequestHeader { + generation: Generation::new(0), term: 1, - term_start_lsn: Lsn(0), begin_lsn: lsn, end_lsn: lsn + record.len() as u64, commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }, wal_data: record, }) @@ -160,13 +160,12 @@ fn bench_wal_acceptor(c: &mut Criterion) { .take(n) .map(|(lsn, record)| AppendRequest { h: AppendRequestHeader { + generation: Generation::new(0), term: 1, - term_start_lsn: Lsn(0), begin_lsn: lsn, end_lsn: lsn + record.len() as u64, commit_lsn: Lsn(0), truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }, wal_data: record, }) @@ -262,13 +261,12 @@ fn bench_wal_acceptor_throughput(c: &mut Criterion) { runtime.block_on(async { let reqgen = walgen.take(count).map(|(lsn, record)| AppendRequest { h: AppendRequestHeader { + generation: Generation::new(0), term: 1, - term_start_lsn: Lsn(0), begin_lsn: lsn, end_lsn: lsn + record.len() as u64, commit_lsn: if commit { lsn } else { Lsn(0) }, // commit previous record truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }, wal_data: record, }); diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs index 19e17c4a75..8d7c1109ad 100644 --- a/safekeeper/src/json_ctrl.rs +++ b/safekeeper/src/json_ctrl.rs @@ -8,7 +8,7 @@ use anyhow::Context; use postgres_backend::QueryError; -use safekeeper_api::membership::Configuration; +use safekeeper_api::membership::{Configuration, INVALID_GENERATION}; use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; @@ -133,10 +133,10 @@ async fn send_proposer_elected( let history = TermHistory(history_entries); let proposer_elected_request = ProposerAcceptorMessage::Elected(ProposerElected { + generation: INVALID_GENERATION, term, start_streaming_at: lsn, term_history: history, - timeline_start_lsn: lsn, }); tli.process_msg(&proposer_elected_request).await?; @@ -170,13 +170,12 @@ pub async fn append_logical_message( let append_request = ProposerAcceptorMessage::AppendRequest(AppendRequest { h: AppendRequestHeader { + generation: INVALID_GENERATION, term: msg.term, - term_start_lsn: begin_lsn, begin_lsn, end_lsn, commit_lsn, truncate_lsn: msg.truncate_lsn, - proposer_uuid: [0u8; 16], }, wal_data, }); diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index cb42f6f414..a94e6930e1 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -281,7 +281,7 @@ impl SafekeeperPostgresHandler { tokio::select! { // todo: add read|write .context to these errors r = network_reader.run(msg_tx, msg_rx, reply_tx, timeline, next_msg) => r, - r = network_write(pgb, reply_rx, pageserver_feedback_rx) => r, + r = network_write(pgb, reply_rx, pageserver_feedback_rx, proto_version) => r, _ = timeline_cancel.cancelled() => { return Err(CopyStreamHandlerEnd::Cancelled); } @@ -342,8 +342,8 @@ impl NetworkReader<'_, IO> { let tli = match next_msg { ProposerAcceptorMessage::Greeting(ref greeting) => { info!( - "start handshake with walproposer {} sysid {} timeline {}", - self.peer_addr, greeting.system_id, greeting.tli, + "start handshake with walproposer {} sysid {}", + self.peer_addr, greeting.system_id, ); let server_info = ServerInfo { pg_version: greeting.pg_version, @@ -459,6 +459,7 @@ async fn network_write( pgb_writer: &mut PostgresBackend, mut reply_rx: Receiver, mut pageserver_feedback_rx: tokio::sync::broadcast::Receiver, + proto_version: u32, ) -> Result<(), CopyStreamHandlerEnd> { let mut buf = BytesMut::with_capacity(128); @@ -496,7 +497,7 @@ async fn network_write( }; buf.clear(); - msg.serialize(&mut buf)?; + msg.serialize(&mut buf, proto_version)?; pgb_writer.write_message(&BeMessage::CopyData(&buf)).await?; } } diff --git a/safekeeper/src/recovery.rs b/safekeeper/src/recovery.rs index 35394eb6ed..3e9080ebbe 100644 --- a/safekeeper/src/recovery.rs +++ b/safekeeper/src/recovery.rs @@ -7,6 +7,7 @@ use std::{fmt, pin::pin}; use anyhow::{bail, Context}; use futures::StreamExt; use postgres_protocol::message::backend::ReplicationMessage; +use safekeeper_api::membership::INVALID_GENERATION; use safekeeper_api::models::{PeerInfo, TimelineStatus}; use safekeeper_api::Term; use tokio::sync::mpsc::{channel, Receiver, Sender}; @@ -267,7 +268,10 @@ async fn recover( ); // Now understand our term history. - let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: donor.term }); + let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { + generation: INVALID_GENERATION, + term: donor.term, + }); let vote_response = match tli .process_msg(&vote_request) .await @@ -302,10 +306,10 @@ async fn recover( // truncate WAL locally let pe = ProposerAcceptorMessage::Elected(ProposerElected { + generation: INVALID_GENERATION, term: donor.term, start_streaming_at: last_common_point.lsn, term_history: donor_th, - timeline_start_lsn: Lsn::INVALID, }); // Successful ProposerElected handling always returns None. If term changed, // we'll find out that during the streaming. Note: it is expected to get @@ -437,13 +441,12 @@ async fn network_io( match msg { ReplicationMessage::XLogData(xlog_data) => { let ar_hdr = AppendRequestHeader { + generation: INVALID_GENERATION, term: donor.term, - term_start_lsn: Lsn::INVALID, // unused begin_lsn: Lsn(xlog_data.wal_start()), end_lsn: Lsn(xlog_data.wal_start()) + xlog_data.data().len() as u64, commit_lsn: Lsn::INVALID, // do not attempt to advance, peer communication anyway does it truncate_lsn: Lsn::INVALID, // do not attempt to advance - proposer_uuid: [0; 16], }; let ar = AppendRequest { h: ar_hdr, diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index f816f8459a..f429cafed2 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -5,6 +5,11 @@ use byteorder::{LittleEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use postgres_ffi::{TimeLineID, MAX_SEND_SIZE}; +use safekeeper_api::membership; +use safekeeper_api::membership::MemberSet; +use safekeeper_api::membership::SafekeeperGeneration as Generation; +use safekeeper_api::membership::SafekeeperId; +use safekeeper_api::membership::INVALID_GENERATION; use safekeeper_api::models::HotStandbyFeedback; use safekeeper_api::Term; use serde::{Deserialize, Serialize}; @@ -12,6 +17,7 @@ use std::cmp::max; use std::cmp::min; use std::fmt; use std::io::Read; +use std::str::FromStr; use storage_broker::proto::SafekeeperTimelineInfo; use tracing::*; @@ -29,7 +35,8 @@ use utils::{ lsn::Lsn, }; -pub const SK_PROTOCOL_VERSION: u32 = 2; +pub const SK_PROTO_VERSION_2: u32 = 2; +pub const SK_PROTO_VERSION_3: u32 = 3; pub const UNKNOWN_SERVER_VERSION: u32 = 0; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] @@ -56,8 +63,28 @@ impl TermHistory { TermHistory(Vec::new()) } - // Parse TermHistory as n_entries followed by TermLsn pairs + // Parse TermHistory as n_entries followed by TermLsn pairs in network order. pub fn from_bytes(bytes: &mut Bytes) -> Result { + let n_entries = bytes + .get_u32_f() + .with_context(|| "TermHistory misses len")?; + let mut res = Vec::with_capacity(n_entries as usize); + for i in 0..n_entries { + let term = bytes + .get_u64_f() + .with_context(|| format!("TermHistory pos {} misses term", i))?; + let lsn = bytes + .get_u64_f() + .with_context(|| format!("TermHistory pos {} misses lsn", i))? + .into(); + res.push(TermLsn { term, lsn }) + } + Ok(TermHistory(res)) + } + + // Parse TermHistory as n_entries followed by TermLsn pairs in LE order. + // TODO remove once v2 protocol is fully dropped. + pub fn from_bytes_le(bytes: &mut Bytes) -> Result { if bytes.remaining() < 4 { bail!("TermHistory misses len"); } @@ -197,6 +224,18 @@ impl AcceptorState { /// Initial Proposer -> Acceptor message #[derive(Debug, Deserialize)] pub struct ProposerGreeting { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub mconf: membership::Configuration, + /// Postgres server version + pub pg_version: u32, + pub system_id: SystemId, + pub wal_seg_size: u32, +} + +/// V2 of the message; exists as a struct because we (de)serialized it as is. +#[derive(Debug, Deserialize)] +pub struct ProposerGreetingV2 { /// proposer-acceptor protocol version pub protocol_version: u32, /// Postgres server version @@ -213,27 +252,35 @@ pub struct ProposerGreeting { /// (acceptor voted for). #[derive(Debug, Serialize)] pub struct AcceptorGreeting { - term: u64, node_id: NodeId, + mconf: membership::Configuration, + term: u64, } /// Vote request sent from proposer to safekeepers -#[derive(Debug, Deserialize)] +#[derive(Debug)] pub struct VoteRequest { + pub generation: Generation, + pub term: Term, +} + +/// V2 of the message; exists as a struct because we (de)serialized it as is. +#[derive(Debug, Deserialize)] +pub struct VoteRequestV2 { pub term: Term, } /// Vote itself, sent from safekeeper to proposer #[derive(Debug, Serialize)] pub struct VoteResponse { + generation: Generation, // membership conf generation pub term: Term, // safekeeper's current term; if it is higher than proposer's, the compute is out of date. - vote_given: u64, // fixme u64 due to padding + vote_given: bool, // Safekeeper flush_lsn (end of WAL) + history of term switches allow // proposer to choose the most advanced one. pub flush_lsn: Lsn, truncate_lsn: Lsn, pub term_history: TermHistory, - timeline_start_lsn: Lsn, } /* @@ -242,10 +289,10 @@ pub struct VoteResponse { */ #[derive(Debug)] pub struct ProposerElected { + pub generation: Generation, // membership conf generation pub term: Term, pub start_streaming_at: Lsn, pub term_history: TermHistory, - pub timeline_start_lsn: Lsn, } /// Request with WAL message sent from proposer to safekeeper. Along the way it @@ -257,6 +304,22 @@ pub struct AppendRequest { } #[derive(Debug, Clone, Deserialize)] pub struct AppendRequestHeader { + pub generation: Generation, // membership conf generation + // safekeeper's current term; if it is higher than proposer's, the compute is out of date. + pub term: Term, + /// start position of message in WAL + pub begin_lsn: Lsn, + /// end position of message in WAL + pub end_lsn: Lsn, + /// LSN committed by quorum of safekeepers + pub commit_lsn: Lsn, + /// minimal LSN which may be needed by proposer to perform recovery of some safekeeper + pub truncate_lsn: Lsn, +} + +/// V2 of the message; exists as a struct because we (de)serialized it as is. +#[derive(Debug, Clone, Deserialize)] +pub struct AppendRequestHeaderV2 { // safekeeper's current term; if it is higher than proposer's, the compute is out of date. pub term: Term, // TODO: remove this field from the protocol, it in unused -- LSN of term @@ -277,6 +340,9 @@ pub struct AppendRequestHeader { /// Report safekeeper state to proposer #[derive(Debug, Serialize, Clone)] pub struct AppendResponse { + // Membership conf generation. Not strictly required because on mismatch + // connection is reset, but let's sanity check it. + generation: Generation, // Current term of the safekeeper; if it is higher than proposer's, the // compute is out of date. pub term: Term, @@ -293,8 +359,9 @@ pub struct AppendResponse { } impl AppendResponse { - fn term_only(term: Term) -> AppendResponse { + fn term_only(generation: Generation, term: Term) -> AppendResponse { AppendResponse { + generation, term, flush_lsn: Lsn(0), commit_lsn: Lsn(0), @@ -315,72 +382,322 @@ pub enum ProposerAcceptorMessage { FlushWAL, } -impl ProposerAcceptorMessage { - /// Parse proposer message. - pub fn parse(msg_bytes: Bytes, proto_version: u32) -> Result { - if proto_version != SK_PROTOCOL_VERSION { - bail!( - "incompatible protocol version {}, expected {}", - proto_version, - SK_PROTOCOL_VERSION - ); +/// Augment Bytes with fallible get_uN where N is number of bytes methods. +/// All reads are in network (big endian) order. +trait BytesF { + fn get_u8_f(&mut self) -> Result; + fn get_u16_f(&mut self) -> Result; + fn get_u32_f(&mut self) -> Result; + fn get_u64_f(&mut self) -> Result; +} + +impl BytesF for Bytes { + fn get_u8_f(&mut self) -> Result { + if self.is_empty() { + bail!("no bytes left, expected 1"); } - // xxx using Reader is inefficient but easy to work with bincode - let mut stream = msg_bytes.reader(); - // u64 is here to avoid padding; it will be removed once we stop packing C structs into the wire as is - let tag = stream.read_u64::()? as u8 as char; - match tag { - 'g' => { - let msg = ProposerGreeting::des_from(&mut stream)?; - Ok(ProposerAcceptorMessage::Greeting(msg)) - } - 'v' => { - let msg = VoteRequest::des_from(&mut stream)?; - Ok(ProposerAcceptorMessage::VoteRequest(msg)) - } - 'e' => { - let mut msg_bytes = stream.into_inner(); - if msg_bytes.remaining() < 16 { - bail!("ProposerElected message is not complete"); - } - let term = msg_bytes.get_u64_le(); - let start_streaming_at = msg_bytes.get_u64_le().into(); - let term_history = TermHistory::from_bytes(&mut msg_bytes)?; - if msg_bytes.remaining() < 8 { - bail!("ProposerElected message is not complete"); - } - let timeline_start_lsn = msg_bytes.get_u64_le().into(); - let msg = ProposerElected { - term, - start_streaming_at, - timeline_start_lsn, - term_history, + Ok(self.get_u8()) + } + fn get_u16_f(&mut self) -> Result { + if self.remaining() < 2 { + bail!("no bytes left, expected 2"); + } + Ok(self.get_u16()) + } + fn get_u32_f(&mut self) -> Result { + if self.remaining() < 4 { + bail!("only {} bytes left, expected 4", self.remaining()); + } + Ok(self.get_u32()) + } + fn get_u64_f(&mut self) -> Result { + if self.remaining() < 8 { + bail!("only {} bytes left, expected 8", self.remaining()); + } + Ok(self.get_u64()) + } +} + +impl ProposerAcceptorMessage { + /// Read cstring from Bytes. + fn get_cstr(buf: &mut Bytes) -> Result { + let pos = buf + .iter() + .position(|x| *x == 0) + .ok_or_else(|| anyhow::anyhow!("missing cstring terminator"))?; + let result = buf.split_to(pos); + buf.advance(1); // drop the null terminator + match std::str::from_utf8(&result) { + Ok(s) => Ok(s.to_string()), + Err(e) => bail!("invalid utf8 in cstring: {}", e), + } + } + + /// Read membership::Configuration from Bytes. + fn get_mconf(buf: &mut Bytes) -> Result { + let generation = Generation::new(buf.get_u32_f().with_context(|| "reading generation")?); + let members_len = buf.get_u32_f().with_context(|| "reading members_len")?; + // Main member set must have at least someone in valid configuration. + // Empty conf is allowed until we fully migrate. + if generation != INVALID_GENERATION && members_len == 0 { + bail!("empty members_len"); + } + let mut members = MemberSet::empty(); + for i in 0..members_len { + let id = buf + .get_u64_f() + .with_context(|| format!("reading member {} node_id", i))?; + let host = Self::get_cstr(buf).with_context(|| format!("reading member {} host", i))?; + let pg_port = buf + .get_u16_f() + .with_context(|| format!("reading member {} port", i))?; + let sk = SafekeeperId { + id: NodeId(id), + host, + pg_port, + }; + members.add(sk)?; + } + let new_members_len = buf.get_u32_f().with_context(|| "reading new_members_len")?; + // Non joint conf. + if new_members_len == 0 { + Ok(membership::Configuration { + generation, + members, + new_members: None, + }) + } else { + let mut new_members = MemberSet::empty(); + for i in 0..new_members_len { + let id = buf + .get_u64_f() + .with_context(|| format!("reading new member {} node_id", i))?; + let host = Self::get_cstr(buf) + .with_context(|| format!("reading new member {} host", i))?; + let pg_port = buf + .get_u16_f() + .with_context(|| format!("reading new member {} port", i))?; + let sk = SafekeeperId { + id: NodeId(id), + host, + pg_port, }; - Ok(ProposerAcceptorMessage::Elected(msg)) + new_members.add(sk)?; } - 'a' => { - // read header followed by wal data - let hdr = AppendRequestHeader::des_from(&mut stream)?; - let rec_size = hdr - .end_lsn - .checked_sub(hdr.begin_lsn) - .context("begin_lsn > end_lsn in AppendRequest")? - .0 as usize; - if rec_size > MAX_SEND_SIZE { - bail!( - "AppendRequest is longer than MAX_SEND_SIZE ({})", - MAX_SEND_SIZE - ); + Ok(membership::Configuration { + generation, + members, + new_members: Some(new_members), + }) + } + } + + /// Parse proposer message. + pub fn parse(mut msg_bytes: Bytes, proto_version: u32) -> Result { + if proto_version == SK_PROTO_VERSION_3 { + if msg_bytes.is_empty() { + bail!("ProposerAcceptorMessage is not complete: missing tag"); + } + let tag = msg_bytes.get_u8_f().with_context(|| { + "ProposerAcceptorMessage is not complete: missing tag".to_string() + })? as char; + match tag { + 'g' => { + let tenant_id_str = + Self::get_cstr(&mut msg_bytes).with_context(|| "reading tenant_id")?; + let tenant_id = TenantId::from_str(&tenant_id_str)?; + let timeline_id_str = + Self::get_cstr(&mut msg_bytes).with_context(|| "reading timeline_id")?; + let timeline_id = TimelineId::from_str(&timeline_id_str)?; + let mconf = Self::get_mconf(&mut msg_bytes)?; + let pg_version = msg_bytes + .get_u32_f() + .with_context(|| "reading pg_version")?; + let system_id = msg_bytes.get_u64_f().with_context(|| "reading system_id")?; + let wal_seg_size = msg_bytes + .get_u32_f() + .with_context(|| "reading wal_seg_size")?; + let g = ProposerGreeting { + tenant_id, + timeline_id, + mconf, + pg_version, + system_id, + wal_seg_size, + }; + Ok(ProposerAcceptorMessage::Greeting(g)) } + 'v' => { + let generation = Generation::new( + msg_bytes + .get_u32_f() + .with_context(|| "reading generation")?, + ); + let term = msg_bytes.get_u64_f().with_context(|| "reading term")?; + let v = VoteRequest { generation, term }; + Ok(ProposerAcceptorMessage::VoteRequest(v)) + } + 'e' => { + let generation = Generation::new( + msg_bytes + .get_u32_f() + .with_context(|| "reading generation")?, + ); + let term = msg_bytes.get_u64_f().with_context(|| "reading term")?; + let start_streaming_at: Lsn = msg_bytes + .get_u64_f() + .with_context(|| "reading start_streaming_at")? + .into(); + let term_history = TermHistory::from_bytes(&mut msg_bytes)?; + let msg = ProposerElected { + generation, + term, + start_streaming_at, + term_history, + }; + Ok(ProposerAcceptorMessage::Elected(msg)) + } + 'a' => { + let generation = Generation::new( + msg_bytes + .get_u32_f() + .with_context(|| "reading generation")?, + ); + let term = msg_bytes.get_u64_f().with_context(|| "reading term")?; + let begin_lsn: Lsn = msg_bytes + .get_u64_f() + .with_context(|| "reading begin_lsn")? + .into(); + let end_lsn: Lsn = msg_bytes + .get_u64_f() + .with_context(|| "reading end_lsn")? + .into(); + let commit_lsn: Lsn = msg_bytes + .get_u64_f() + .with_context(|| "reading commit_lsn")? + .into(); + let truncate_lsn: Lsn = msg_bytes + .get_u64_f() + .with_context(|| "reading truncate_lsn")? + .into(); + let hdr = AppendRequestHeader { + generation, + term, + begin_lsn, + end_lsn, + commit_lsn, + truncate_lsn, + }; + let rec_size = hdr + .end_lsn + .checked_sub(hdr.begin_lsn) + .context("begin_lsn > end_lsn in AppendRequest")? + .0 as usize; + if rec_size > MAX_SEND_SIZE { + bail!( + "AppendRequest is longer than MAX_SEND_SIZE ({})", + MAX_SEND_SIZE + ); + } + if msg_bytes.remaining() < rec_size { + bail!( + "reading WAL: only {} bytes left, wanted {}", + msg_bytes.remaining(), + rec_size + ); + } + let wal_data = msg_bytes.copy_to_bytes(rec_size); + let msg = AppendRequest { h: hdr, wal_data }; - let mut wal_data_vec: Vec = vec![0; rec_size]; - stream.read_exact(&mut wal_data_vec)?; - let wal_data = Bytes::from(wal_data_vec); - let msg = AppendRequest { h: hdr, wal_data }; - - Ok(ProposerAcceptorMessage::AppendRequest(msg)) + Ok(ProposerAcceptorMessage::AppendRequest(msg)) + } + _ => bail!("unknown proposer-acceptor message tag: {}", tag), } - _ => bail!("unknown proposer-acceptor message tag: {}", tag), + } else if proto_version == SK_PROTO_VERSION_2 { + // xxx using Reader is inefficient but easy to work with bincode + let mut stream = msg_bytes.reader(); + // u64 is here to avoid padding; it will be removed once we stop packing C structs into the wire as is + let tag = stream.read_u64::()? as u8 as char; + match tag { + 'g' => { + let msgv2 = ProposerGreetingV2::des_from(&mut stream)?; + let g = ProposerGreeting { + tenant_id: msgv2.tenant_id, + timeline_id: msgv2.timeline_id, + mconf: membership::Configuration { + generation: INVALID_GENERATION, + members: MemberSet::empty(), + new_members: None, + }, + pg_version: msgv2.pg_version, + system_id: msgv2.system_id, + wal_seg_size: msgv2.wal_seg_size, + }; + Ok(ProposerAcceptorMessage::Greeting(g)) + } + 'v' => { + let msg = VoteRequestV2::des_from(&mut stream)?; + let v = VoteRequest { + generation: INVALID_GENERATION, + term: msg.term, + }; + Ok(ProposerAcceptorMessage::VoteRequest(v)) + } + 'e' => { + let mut msg_bytes = stream.into_inner(); + if msg_bytes.remaining() < 16 { + bail!("ProposerElected message is not complete"); + } + let term = msg_bytes.get_u64_le(); + let start_streaming_at = msg_bytes.get_u64_le().into(); + let term_history = TermHistory::from_bytes_le(&mut msg_bytes)?; + if msg_bytes.remaining() < 8 { + bail!("ProposerElected message is not complete"); + } + let _timeline_start_lsn = msg_bytes.get_u64_le(); + let msg = ProposerElected { + generation: INVALID_GENERATION, + term, + start_streaming_at, + term_history, + }; + Ok(ProposerAcceptorMessage::Elected(msg)) + } + 'a' => { + // read header followed by wal data + let hdrv2 = AppendRequestHeaderV2::des_from(&mut stream)?; + let hdr = AppendRequestHeader { + generation: INVALID_GENERATION, + term: hdrv2.term, + begin_lsn: hdrv2.begin_lsn, + end_lsn: hdrv2.end_lsn, + commit_lsn: hdrv2.commit_lsn, + truncate_lsn: hdrv2.truncate_lsn, + }; + let rec_size = hdr + .end_lsn + .checked_sub(hdr.begin_lsn) + .context("begin_lsn > end_lsn in AppendRequest")? + .0 as usize; + if rec_size > MAX_SEND_SIZE { + bail!( + "AppendRequest is longer than MAX_SEND_SIZE ({})", + MAX_SEND_SIZE + ); + } + + let mut wal_data_vec: Vec = vec![0; rec_size]; + stream.read_exact(&mut wal_data_vec)?; + let wal_data = Bytes::from(wal_data_vec); + + let msg = AppendRequest { h: hdr, wal_data }; + + Ok(ProposerAcceptorMessage::AppendRequest(msg)) + } + _ => bail!("unknown proposer-acceptor message tag: {}", tag), + } + } else { + bail!("unsupported protocol version {}", proto_version); } } @@ -394,36 +711,21 @@ impl ProposerAcceptorMessage { // We explicitly list all fields, to draw attention here when new fields are added. let mut size = BASE_SIZE; size += match self { - Self::Greeting(ProposerGreeting { - protocol_version: _, - pg_version: _, - proposer_id: _, - system_id: _, - timeline_id: _, - tenant_id: _, - tli: _, - wal_seg_size: _, - }) => 0, + Self::Greeting(_) => 0, - Self::VoteRequest(VoteRequest { term: _ }) => 0, + Self::VoteRequest(_) => 0, - Self::Elected(ProposerElected { - term: _, - start_streaming_at: _, - term_history: _, - timeline_start_lsn: _, - }) => 0, + Self::Elected(_) => 0, Self::AppendRequest(AppendRequest { h: AppendRequestHeader { + generation: _, term: _, - term_start_lsn: _, begin_lsn: _, end_lsn: _, commit_lsn: _, truncate_lsn: _, - proposer_uuid: _, }, wal_data, }) => wal_data.len(), @@ -431,13 +733,12 @@ impl ProposerAcceptorMessage { Self::NoFlushAppendRequest(AppendRequest { h: AppendRequestHeader { + generation: _, term: _, - term_start_lsn: _, begin_lsn: _, end_lsn: _, commit_lsn: _, truncate_lsn: _, - proposer_uuid: _, }, wal_data, }) => wal_data.len(), @@ -458,45 +759,118 @@ pub enum AcceptorProposerMessage { } impl AcceptorProposerMessage { - /// Serialize acceptor -> proposer message. - pub fn serialize(&self, buf: &mut BytesMut) -> Result<()> { - match self { - AcceptorProposerMessage::Greeting(msg) => { - buf.put_u64_le('g' as u64); - buf.put_u64_le(msg.term); - buf.put_u64_le(msg.node_id.0); - } - AcceptorProposerMessage::VoteResponse(msg) => { - buf.put_u64_le('v' as u64); - buf.put_u64_le(msg.term); - buf.put_u64_le(msg.vote_given); - buf.put_u64_le(msg.flush_lsn.into()); - buf.put_u64_le(msg.truncate_lsn.into()); - buf.put_u32_le(msg.term_history.0.len() as u32); - for e in &msg.term_history.0 { - buf.put_u64_le(e.term); - buf.put_u64_le(e.lsn.into()); - } - buf.put_u64_le(msg.timeline_start_lsn.into()); - } - AcceptorProposerMessage::AppendResponse(msg) => { - buf.put_u64_le('a' as u64); - buf.put_u64_le(msg.term); - buf.put_u64_le(msg.flush_lsn.into()); - buf.put_u64_le(msg.commit_lsn.into()); - buf.put_i64_le(msg.hs_feedback.ts); - buf.put_u64_le(msg.hs_feedback.xmin); - buf.put_u64_le(msg.hs_feedback.catalog_xmin); + fn put_cstr(buf: &mut BytesMut, s: &str) { + buf.put_slice(s.as_bytes()); + buf.put_u8(0); // null terminator + } - // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback - // if it is not present. - if let Some(ref msg) = msg.pageserver_feedback { - msg.serialize(buf); - } - } + /// Serialize membership::Configuration into buf. + fn serialize_mconf(buf: &mut BytesMut, mconf: &membership::Configuration) { + buf.put_u32(mconf.generation.into_inner()); + buf.put_u32(mconf.members.m.len() as u32); + for sk in &mconf.members.m { + buf.put_u64(sk.id.0); + Self::put_cstr(buf, &sk.host); + buf.put_u16(sk.pg_port); } + if let Some(ref new_members) = mconf.new_members { + buf.put_u32(new_members.m.len() as u32); + for sk in &new_members.m { + buf.put_u64(sk.id.0); + Self::put_cstr(buf, &sk.host); + buf.put_u16(sk.pg_port); + } + } else { + buf.put_u32(0); + } + } - Ok(()) + /// Serialize acceptor -> proposer message. + pub fn serialize(&self, buf: &mut BytesMut, proto_version: u32) -> Result<()> { + if proto_version == SK_PROTO_VERSION_3 { + match self { + AcceptorProposerMessage::Greeting(msg) => { + buf.put_u8(b'g'); + buf.put_u64(msg.node_id.0); + Self::serialize_mconf(buf, &msg.mconf); + buf.put_u64(msg.term) + } + AcceptorProposerMessage::VoteResponse(msg) => { + buf.put_u8(b'v'); + buf.put_u32(msg.generation.into_inner()); + buf.put_u64(msg.term); + buf.put_u8(msg.vote_given as u8); + buf.put_u64(msg.flush_lsn.into()); + buf.put_u64(msg.truncate_lsn.into()); + buf.put_u32(msg.term_history.0.len() as u32); + for e in &msg.term_history.0 { + buf.put_u64(e.term); + buf.put_u64(e.lsn.into()); + } + } + AcceptorProposerMessage::AppendResponse(msg) => { + buf.put_u8(b'a'); + buf.put_u32(msg.generation.into_inner()); + buf.put_u64(msg.term); + buf.put_u64(msg.flush_lsn.into()); + buf.put_u64(msg.commit_lsn.into()); + buf.put_i64(msg.hs_feedback.ts); + buf.put_u64(msg.hs_feedback.xmin); + buf.put_u64(msg.hs_feedback.catalog_xmin); + + // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback + // if it is not present. + if let Some(ref msg) = msg.pageserver_feedback { + msg.serialize(buf); + } + } + } + Ok(()) + // TODO remove 3 after converting all msgs + } else if proto_version == SK_PROTO_VERSION_2 { + match self { + AcceptorProposerMessage::Greeting(msg) => { + buf.put_u64_le('g' as u64); + // v2 didn't have mconf and fields were reordered + buf.put_u64_le(msg.term); + buf.put_u64_le(msg.node_id.0); + } + AcceptorProposerMessage::VoteResponse(msg) => { + // v2 didn't have generation, had u64 vote_given and timeline_start_lsn + buf.put_u64_le('v' as u64); + buf.put_u64_le(msg.term); + buf.put_u64_le(msg.vote_given as u64); + buf.put_u64_le(msg.flush_lsn.into()); + buf.put_u64_le(msg.truncate_lsn.into()); + buf.put_u32_le(msg.term_history.0.len() as u32); + for e in &msg.term_history.0 { + buf.put_u64_le(e.term); + buf.put_u64_le(e.lsn.into()); + } + // removed timeline_start_lsn + buf.put_u64_le(0); + } + AcceptorProposerMessage::AppendResponse(msg) => { + // v2 didn't have generation + buf.put_u64_le('a' as u64); + buf.put_u64_le(msg.term); + buf.put_u64_le(msg.flush_lsn.into()); + buf.put_u64_le(msg.commit_lsn.into()); + buf.put_i64_le(msg.hs_feedback.ts); + buf.put_u64_le(msg.hs_feedback.xmin); + buf.put_u64_le(msg.hs_feedback.catalog_xmin); + + // AsyncReadMessage in walproposer.c will not try to decode pageserver_feedback + // if it is not present. + if let Some(ref msg) = msg.pageserver_feedback { + msg.serialize(buf); + } + } + } + Ok(()) + } else { + bail!("unsupported protocol version {}", proto_version); + } } } @@ -593,14 +967,6 @@ where &mut self, msg: &ProposerGreeting, ) -> Result> { - // Check protocol compatibility - if msg.protocol_version != SK_PROTOCOL_VERSION { - bail!( - "incompatible protocol version {}, expected {}", - msg.protocol_version, - SK_PROTOCOL_VERSION - ); - } /* Postgres major version mismatch is treated as fatal error * because safekeepers parse WAL headers and the format * may change between versions. @@ -655,15 +1021,16 @@ where self.state.finish_change(&state).await?; } - info!( - "processed greeting from walproposer {}, sending term {:?}", - msg.proposer_id.map(|b| format!("{:X}", b)).join(""), - self.state.acceptor_state.term - ); - Ok(Some(AcceptorProposerMessage::Greeting(AcceptorGreeting { - term: self.state.acceptor_state.term, + let apg = AcceptorGreeting { node_id: self.node_id, - }))) + mconf: self.state.mconf.clone(), + term: self.state.acceptor_state.term, + }; + info!( + "processed greeting {:?} from walproposer, sending {:?}", + msg, apg + ); + Ok(Some(AcceptorProposerMessage::Greeting(apg))) } /// Give vote for the given term, if we haven't done that previously. @@ -684,12 +1051,12 @@ where self.wal_store.flush_wal().await?; // initialize with refusal let mut resp = VoteResponse { + generation: self.state.mconf.generation, term: self.state.acceptor_state.term, - vote_given: false as u64, + vote_given: false, flush_lsn: self.flush_lsn(), truncate_lsn: self.state.inmem.peer_horizon_lsn, term_history: self.get_term_history(), - timeline_start_lsn: self.state.timeline_start_lsn, }; if self.state.acceptor_state.term < msg.term { let mut state = self.state.start_change(); @@ -698,15 +1065,16 @@ where self.state.finish_change(&state).await?; resp.term = self.state.acceptor_state.term; - resp.vote_given = true as u64; + resp.vote_given = true; } - info!("processed VoteRequest for term {}: {:?}", msg.term, &resp); + info!("processed {:?}: sending {:?}", msg, &resp); Ok(Some(AcceptorProposerMessage::VoteResponse(resp))) } /// Form AppendResponse from current state. fn append_response(&self) -> AppendResponse { let ar = AppendResponse { + generation: self.state.mconf.generation, term: self.state.acceptor_state.term, flush_lsn: self.flush_lsn(), commit_lsn: self.state.commit_lsn, @@ -805,18 +1173,22 @@ where // Here we learn initial LSN for the first time, set fields // interested in that. - if state.timeline_start_lsn == Lsn(0) { - // Remember point where WAL begins globally. - state.timeline_start_lsn = msg.timeline_start_lsn; - info!( - "setting timeline_start_lsn to {:?}", - state.timeline_start_lsn - ); + if let Some(start_lsn) = msg.term_history.0.first() { + if state.timeline_start_lsn == Lsn(0) { + // Remember point where WAL begins globally. In the future it + // will be intialized immediately on timeline creation. + state.timeline_start_lsn = start_lsn.lsn; + info!( + "setting timeline_start_lsn to {:?}", + state.timeline_start_lsn + ); + } } + if state.peer_horizon_lsn == Lsn(0) { // Update peer_horizon_lsn as soon as we know where timeline starts. // It means that peer_horizon_lsn cannot be zero after we know timeline_start_lsn. - state.peer_horizon_lsn = msg.timeline_start_lsn; + state.peer_horizon_lsn = state.timeline_start_lsn; } if state.local_start_lsn == Lsn(0) { state.local_start_lsn = msg.start_streaming_at; @@ -896,7 +1268,10 @@ where // If our term is higher, immediately refuse the message. if self.state.acceptor_state.term > msg.h.term { - let resp = AppendResponse::term_only(self.state.acceptor_state.term); + let resp = AppendResponse::term_only( + self.state.mconf.generation, + self.state.acceptor_state.term, + ); return Ok(Some(AcceptorProposerMessage::AppendResponse(resp))); } @@ -924,10 +1299,8 @@ where ); } - // Now we know that we are in the same term as the proposer, - // processing the message. - - self.state.inmem.proposer_uuid = msg.h.proposer_uuid; + // Now we know that we are in the same term as the proposer, process the + // message. // do the job if !msg.wal_data.is_empty() { @@ -1097,10 +1470,13 @@ mod tests { let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap(); // check voting for 1 is ok - let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { term: 1 }); + let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { + generation: Generation::new(0), + term: 1, + }); let mut vote_resp = sk.process_msg(&vote_request).await; match vote_resp.unwrap() { - Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given != 0), + Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given), r => panic!("unexpected response: {:?}", r), } @@ -1115,7 +1491,7 @@ mod tests { // and ensure voting second time for 1 is not ok vote_resp = sk.process_msg(&vote_request).await; match vote_resp.unwrap() { - Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(resp.vote_given == 0), + Some(AcceptorProposerMessage::VoteResponse(resp)) => assert!(!resp.vote_given), r => panic!("unexpected response: {:?}", r), } } @@ -1130,13 +1506,12 @@ mod tests { let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap(); let mut ar_hdr = AppendRequestHeader { + generation: Generation::new(0), term: 2, - term_start_lsn: Lsn(3), begin_lsn: Lsn(1), end_lsn: Lsn(2), commit_lsn: Lsn(0), truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }; let mut append_request = AppendRequest { h: ar_hdr.clone(), @@ -1144,6 +1519,7 @@ mod tests { }; let pem = ProposerElected { + generation: Generation::new(0), term: 2, start_streaming_at: Lsn(1), term_history: TermHistory(vec![ @@ -1156,7 +1532,6 @@ mod tests { lsn: Lsn(3), }, ]), - timeline_start_lsn: Lsn(1), }; sk.process_msg(&ProposerAcceptorMessage::Elected(pem)) .await @@ -1191,26 +1566,25 @@ mod tests { let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap(); let pem = ProposerElected { + generation: Generation::new(0), term: 1, start_streaming_at: Lsn(1), term_history: TermHistory(vec![TermLsn { term: 1, lsn: Lsn(1), }]), - timeline_start_lsn: Lsn(1), }; sk.process_msg(&ProposerAcceptorMessage::Elected(pem)) .await .unwrap(); let ar_hdr = AppendRequestHeader { + generation: Generation::new(0), term: 1, - term_start_lsn: Lsn(3), begin_lsn: Lsn(1), end_lsn: Lsn(2), commit_lsn: Lsn(0), truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }; let append_request = AppendRequest { h: ar_hdr.clone(), diff --git a/safekeeper/src/test_utils.rs b/safekeeper/src/test_utils.rs index 79ceddd366..32af4537d3 100644 --- a/safekeeper/src/test_utils.rs +++ b/safekeeper/src/test_utils.rs @@ -14,6 +14,7 @@ use crate::wal_backup::remote_timeline_path; use crate::{control_file, receive_wal, wal_storage, SafeKeeperConf}; use camino_tempfile::Utf8TempDir; use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator}; +use safekeeper_api::membership::SafekeeperGeneration as Generation; use tokio::fs::create_dir_all; use utils::id::{NodeId, TenantTimelineId}; use utils::lsn::Lsn; @@ -73,10 +74,10 @@ impl Env { // Emulate an initial election. safekeeper .process_msg(&ProposerAcceptorMessage::Elected(ProposerElected { + generation: Generation::new(0), term: 1, start_streaming_at: start_lsn, term_history: TermHistory(vec![(1, start_lsn).into()]), - timeline_start_lsn: start_lsn, })) .await?; @@ -146,13 +147,12 @@ impl Env { let req = AppendRequest { h: AppendRequestHeader { + generation: Generation::new(0), term: 1, - term_start_lsn: start_lsn, begin_lsn: lsn, end_lsn: lsn + record.len() as u64, commit_lsn: lsn, truncate_lsn: Lsn(0), - proposer_uuid: [0; 16], }, wal_data: record, }; diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 0023a4d22a..b9dfabe0d7 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -15,9 +15,7 @@ use desim::{ }; use http::Uri; use safekeeper::{ - safekeeper::{ - ProposerAcceptorMessage, SafeKeeper, SK_PROTOCOL_VERSION, UNKNOWN_SERVER_VERSION, - }, + safekeeper::{ProposerAcceptorMessage, SafeKeeper, SK_PROTO_VERSION_3, UNKNOWN_SERVER_VERSION}, state::{TimelinePersistentState, TimelineState}, timeline::TimelineError, wal_storage::Storage, @@ -287,7 +285,7 @@ impl ConnState { bail!("finished processing START_REPLICATION") } - let msg = ProposerAcceptorMessage::parse(copy_data, SK_PROTOCOL_VERSION)?; + let msg = ProposerAcceptorMessage::parse(copy_data, SK_PROTO_VERSION_3)?; debug!("got msg: {:?}", msg); self.process(msg, global) } else { @@ -403,7 +401,7 @@ impl ConnState { // TODO: if this is AppendResponse, fill in proper hot standby feedback and disk consistent lsn let mut buf = BytesMut::with_capacity(128); - reply.serialize(&mut buf)?; + reply.serialize(&mut buf, SK_PROTO_VERSION_3)?; self.tcp.send(AnyMessage::Bytes(buf.into())); } diff --git a/test_runner/regress/test_normal_work.py b/test_runner/regress/test_normal_work.py index ae2d171058..c8458b963e 100644 --- a/test_runner/regress/test_normal_work.py +++ b/test_runner/regress/test_normal_work.py @@ -6,9 +6,14 @@ from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder from fixtures.pageserver.http import PageserverHttpClient -def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): +def check_tenant( + env: NeonEnv, pageserver_http: PageserverHttpClient, safekeeper_proto_version: int +): tenant_id, timeline_id = env.create_tenant() - endpoint = env.endpoints.create_start("main", tenant_id=tenant_id) + config_lines = [ + f"neon.safekeeper_proto_version = {safekeeper_proto_version}", + ] + endpoint = env.endpoints.create_start("main", tenant_id=tenant_id, config_lines=config_lines) # we rely upon autocommit after each statement res_1 = endpoint.safe_psql_many( queries=[ @@ -33,7 +38,14 @@ def check_tenant(env: NeonEnv, pageserver_http: PageserverHttpClient): @pytest.mark.parametrize("num_timelines,num_safekeepers", [(3, 1)]) -def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_safekeepers: int): +# Test both proto versions until we fully migrate. +@pytest.mark.parametrize("safekeeper_proto_version", [2, 3]) +def test_normal_work( + neon_env_builder: NeonEnvBuilder, + num_timelines: int, + num_safekeepers: int, + safekeeper_proto_version: int, +): """ Basic test: * create new tenant with a timeline @@ -52,4 +64,4 @@ def test_normal_work(neon_env_builder: NeonEnvBuilder, num_timelines: int, num_s pageserver_http = env.pageserver.http_client() for _ in range(num_timelines): - check_tenant(env, pageserver_http) + check_tenant(env, pageserver_http, safekeeper_proto_version) diff --git a/test_runner/regress/test_wal_acceptor_async.py b/test_runner/regress/test_wal_acceptor_async.py index 936c774657..56539a0a08 100644 --- a/test_runner/regress/test_wal_acceptor_async.py +++ b/test_runner/regress/test_wal_acceptor_async.py @@ -539,13 +539,16 @@ def test_recovery_uncommitted(neon_env_builder: NeonEnvBuilder): asyncio.run(run_recovery_uncommitted(env)) -async def run_wal_truncation(env: NeonEnv): +async def run_wal_truncation(env: NeonEnv, safekeeper_proto_version: int): tenant_id = env.initial_tenant timeline_id = env.initial_timeline (sk1, sk2, sk3) = env.safekeepers - ep = env.endpoints.create_start("main") + config_lines = [ + f"neon.safekeeper_proto_version = {safekeeper_proto_version}", + ] + ep = env.endpoints.create_start("main", config_lines=config_lines) ep.safe_psql("create table t (key int, value text)") ep.safe_psql("insert into t select generate_series(1, 100), 'payload'") @@ -572,6 +575,7 @@ async def run_wal_truncation(env: NeonEnv): sk2.start() ep = env.endpoints.create_start( "main", + config_lines=config_lines, ) ep.safe_psql("insert into t select generate_series(1, 200), 'payload'") @@ -590,11 +594,13 @@ async def run_wal_truncation(env: NeonEnv): # Simple deterministic test creating tail of WAL on safekeeper which is # truncated when majority without this sk elects walproposer starting earlier. -def test_wal_truncation(neon_env_builder: NeonEnvBuilder): +# Test both proto versions until we fully migrate. +@pytest.mark.parametrize("safekeeper_proto_version", [2, 3]) +def test_wal_truncation(neon_env_builder: NeonEnvBuilder, safekeeper_proto_version: int): neon_env_builder.num_safekeepers = 3 env = neon_env_builder.init_start() - asyncio.run(run_wal_truncation(env)) + asyncio.run(run_wal_truncation(env, safekeeper_proto_version)) async def run_segment_init_failure(env: NeonEnv): From 8f82c661d4d39c29b6b2cf0d616e6abc9aac0e25 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Tue, 25 Feb 2025 14:23:04 +0200 Subject: [PATCH 008/207] Move neon_pgstat_file_size_limit to the extension (#10959) ## Problem PG14 uses separate backend for stats collector having no access to shaerd memory. As far as AUX mechanism requires access to shared memory, persisting pgstat.stat file is not supported at pg14. And so there is no definition of `neon_pgstat_file_size_limit` variable. It makes it impossible to provide same config for all Postgres version. ## Summary of changes Move neon_pgstat_file_size_limit to Neon extension. Postgres submodules PR: https://github.com/neondatabase/postgres/pull/587 https://github.com/neondatabase/postgres/pull/588 https://github.com/neondatabase/postgres/pull/589 --------- Co-authored-by: Konstantin Knizhnik Co-authored-by: Tristan Partin --- pgxn/neon/neon.c | 20 ++++++++++++++++++++ test_runner/regress/test_pgstat.py | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 6 +++--- 6 files changed, 27 insertions(+), 7 deletions(-) diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 700a942284..768d7ae9e8 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -12,6 +12,7 @@ #include "fmgr.h" #include "miscadmin.h" +#include "pgstat.h" #include "access/subtrans.h" #include "access/twophase.h" #include "access/xlog.h" @@ -410,6 +411,16 @@ ReportSearchPath(void) } } +#if PG_VERSION_NUM < 150000 +/* + * PG14 uses separate backend for stats collector having no access to shared memory. + * As far as AUX mechanism requires access to shared memory, persisting pgstat.stat file + * is not supported in PG14. And so there is no definition of neon_pgstat_file_size_limit + * variable, so we have to declare it here. + */ +static int neon_pgstat_file_size_limit; +#endif + void _PG_init(void) { @@ -467,6 +478,15 @@ _PG_init(void) 0, NULL, NULL, NULL); + DefineCustomIntVariable("neon.pgstat_file_size_limit", + "Maximal size of pgstat.stat file saved in Neon storage", + "Zero value disables persisting pgstat.stat file", + &neon_pgstat_file_size_limit, + 0, 0, 1000000, /* disabled by default */ + PGC_SIGHUP, + GUC_UNIT_KB, + NULL, NULL, NULL); + /* * Important: This must happen after other parts of the extension are * loaded, otherwise any settings to GUCs that were set before the diff --git a/test_runner/regress/test_pgstat.py b/test_runner/regress/test_pgstat.py index c31e5ef7f8..bf9b982e14 100644 --- a/test_runner/regress/test_pgstat.py +++ b/test_runner/regress/test_pgstat.py @@ -13,7 +13,7 @@ def test_pgstat(neon_simple_env: NeonEnv): n = 10000 endpoint = env.endpoints.create_start( - "main", config_lines=["neon_pgstat_file_size_limit=100kB", "autovacuum=off"] + "main", config_lines=["neon.pgstat_file_size_limit=100kB", "autovacuum=off"] ) con = endpoint.connect() diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 6ff5044377..9b118b1cff 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 6ff50443773b69749e16da6db9d4f4b19064b4b7 +Subproject commit 9b118b1cffa6e4ca0d63389b57b54d11e207e9a8 diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 261ed10e9b..799e7a08dd 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 261ed10e9b8c8dda01ad7aefb18e944e30aa161d +Subproject commit 799e7a08dd171aa06a7395dd326f4243aaeb9f93 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 59b2fe851f..517b8dc244 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 59b2fe851f8e0595f6c830b90ee766f4f1c17a0f +Subproject commit 517b8dc244abf3e56f0089849e464af76f70b94e diff --git a/vendor/revisions.json b/vendor/revisions.json index f85cec3a0b..8dde46a01e 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,15 +1,15 @@ { "v17": [ "17.4", - "59b2fe851f8e0595f6c830b90ee766f4f1c17a0f" + "517b8dc244abf3e56f0089849e464af76f70b94e" ], "v16": [ "16.8", - "261ed10e9b8c8dda01ad7aefb18e944e30aa161d" + "799e7a08dd171aa06a7395dd326f4243aaeb9f93" ], "v15": [ "15.12", - "6ff50443773b69749e16da6db9d4f4b19064b4b7" + "9b118b1cffa6e4ca0d63389b57b54d11e207e9a8" ], "v14": [ "14.17", From f4fefd9f2fbcc06a319d082b00a7868de9084dd3 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Tue, 25 Feb 2025 13:29:27 +0100 Subject: [PATCH 009/207] pre-commit: Switch to cargo fmt to handle per-crate editions (#10969) cargo knows what edition each crate uses. --- pre-commit.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/pre-commit.py b/pre-commit.py index c9567e0c50..09139459d5 100755 --- a/pre-commit.py +++ b/pre-commit.py @@ -29,12 +29,12 @@ def colorify( return f"{color.value}{s}{NC}" -def rustfmt(fix_inplace: bool = False, no_color: bool = False) -> str: - cmd = "rustfmt --edition=2021" +def cargo_fmt(fix_inplace: bool = False, no_color: bool = False) -> str: + cmd = "cargo fmt" if not fix_inplace: cmd += " --check" if no_color: - cmd += " --color=never" + cmd += " -- --color=never" return cmd @@ -61,14 +61,23 @@ def get_commit_files() -> list[str]: return files.decode().splitlines() -def check(name: str, suffix: str, cmd: str, changed_files: list[str], no_color: bool = False): +def check( + name: str, + suffix: str, + cmd: str, + changed_files: list[str], + no_color: bool = False, + append_files_to_cmd: bool = True, +): print(f"Checking: {name} ", end="") applicable_files = list(filter(lambda fname: fname.strip().endswith(suffix), changed_files)) if not applicable_files: print(colorify("[NOT APPLICABLE]", Color.CYAN, no_color)) return - cmd = f'{cmd} {" ".join(applicable_files)}' + if append_files_to_cmd: + cmd = f"{cmd} {' '.join(applicable_files)}" + res = subprocess.run(cmd.split(), capture_output=True) if res.returncode != 0: print(colorify("[FAILED]", Color.RED, no_color)) @@ -100,15 +109,13 @@ if __name__ == "__main__": args = parser.parse_args() files = get_commit_files() - # we use rustfmt here because cargo fmt does not accept list of files - # it internally gathers project files and feeds them to rustfmt - # so because we want to check only files included in the commit we use rustfmt directly check( - name="rustfmt", + name="cargo fmt", suffix=".rs", - cmd=rustfmt(fix_inplace=args.fix_inplace, no_color=args.no_color), + cmd=cargo_fmt(fix_inplace=args.fix_inplace, no_color=args.no_color), changed_files=files, no_color=args.no_color, + append_files_to_cmd=False, ) check( name="ruff check", From f78ac4474894664faae2597425e7dc05e7bbf562 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 25 Feb 2025 13:44:06 +0100 Subject: [PATCH 010/207] Use the Dockerfile COPY instead of docker cp (#10943) ## Problem We use `docker cp` to copy the files required for the extension tests now. It causes problems if we run older images with the newer source tree. ## Summary of changes Copying the files was moved to the compute Dockerfile. --- .dockerignore | 1 + compute/compute-node.Dockerfile | 2 +- docker-compose/docker_compose_test.sh | 2 -- docker-compose/test_extensions_upgrade.sh | 1 - 4 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.dockerignore b/.dockerignore index 7ead48db7c..9fafc2e4ba 100644 --- a/.dockerignore +++ b/.dockerignore @@ -14,6 +14,7 @@ !compute/ !compute_tools/ !control_plane/ +!docker-compose/ext-src !libs/ !pageserver/ !pgxn/ diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index a74291fdb4..0cdb44853f 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1818,7 +1818,7 @@ RUN make PG_VERSION="${PG_VERSION:?}" -C compute FROM pg-build AS extension-tests ARG PG_VERSION -RUN mkdir /ext-src +COPY docker-compose/ext-src/ /ext-src/ COPY --from=pg-build /postgres /postgres #COPY --from=postgis-src /ext-src/ /ext-src/ diff --git a/docker-compose/docker_compose_test.sh b/docker-compose/docker_compose_test.sh index 5b3cfc74eb..0f03d600a3 100755 --- a/docker-compose/docker_compose_test.sh +++ b/docker-compose/docker_compose_test.sh @@ -51,8 +51,6 @@ for pg_version in ${TEST_VERSION_ONLY-14 15 16 17}; do done if [ $pg_version -ge 16 ]; then - docker cp ext-src $TEST_CONTAINER_NAME:/ - docker exec $TEST_CONTAINER_NAME bash -c "apt update && apt install -y libtap-parser-sourcehandler-pgtap-perl" # This is required for the pg_hint_plan test, to prevent flaky log message causing the test to fail # It cannot be moved to Dockerfile now because the database directory is created after the start of the container echo Adding dummy config diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh index 06d351b496..c2168c47af 100755 --- a/docker-compose/test_extensions_upgrade.sh +++ b/docker-compose/test_extensions_upgrade.sh @@ -57,7 +57,6 @@ new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regress docker compose --profile test-extensions down TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate wait_for_ready -docker compose cp ext-src neon-test-extensions:/ docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression" From 8deeddd4f0dcab486d41a8ffdd5d3e1d5da70f30 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 25 Feb 2025 15:49:41 +0100 Subject: [PATCH 011/207] pageserver: ignore `CollectKeySpaceError::Cancelled` during compaction (#10968) This pops up a few times during deployment. Not sure why it fires without `self.cancel` being cancelled, but could be e.g. ancestor timelines or sth. --- pageserver/src/tenant/timeline/compaction.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index d75591bd74..bfb610e0d9 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -779,6 +779,7 @@ impl Timeline { // Suppress errors when cancelled. Err(_) if self.cancel.is_cancelled() => {} Err(CompactionError::ShuttingDown) => {} + Err(CompactionError::CollectKeySpaceError(CollectKeySpaceError::Cancelled)) => {} // Alert on critical errors that indicate data corruption. Err( From b7fcf2c7a7fb47ebc736db7b201cde04995b21c5 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 25 Feb 2025 09:50:22 -0500 Subject: [PATCH 012/207] test(pageserver): add reldir v2 into tests (#10750) ## Problem We have `test_perf_many_relations` but it only runs on remote clusters, and we cannot directly modify tenant config. Therefore, I patched one of the current tests to benchmark relv2 performance. close https://github.com/neondatabase/neon/issues/9986 ## Summary of changes * Add `v1/v2` selector to `test_tx_abort_with_many_relations`. --------- Signed-off-by: Alex Chi Z --- .../performance/test_perf_many_relations.py | 51 ++++++++++ test_runner/regress/test_pg_regress.py | 93 ++++++++++++------- 2 files changed, 109 insertions(+), 35 deletions(-) diff --git a/test_runner/performance/test_perf_many_relations.py b/test_runner/performance/test_perf_many_relations.py index 0ee0efe8b9..2570c55f6c 100644 --- a/test_runner/performance/test_perf_many_relations.py +++ b/test_runner/performance/test_perf_many_relations.py @@ -2,8 +2,10 @@ import os from pathlib import Path import pytest +from fixtures.benchmark_fixture import NeonBenchmarker from fixtures.compare_fixtures import RemoteCompare from fixtures.log_helper import log +from fixtures.neon_fixtures import NeonEnvBuilder def get_num_relations(default: int = 1000) -> list[int]: @@ -64,3 +66,52 @@ def test_perf_many_relations(remote_compare: RemoteCompare, num_relations: int): env.pg_bin.run_capture( ["psql", env.pg.connstr(options="-cstatement_timeout=1000s "), "-c", sql] ) + + +def test_perf_simple_many_relations_reldir_v2( + neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker +): + """ + Test creating many relations in a single database. + """ + env = neon_env_builder.init_start(initial_tenant_conf={"rel_size_v2_enabled": "true"}) + ep = env.endpoints.create_start( + "main", + config_lines=[ + "shared_buffers=1000MB", + "max_locks_per_transaction=16384", + ], + ) + + n = 100000 + step = 5000 + # Create many relations + log.info(f"Creating {n} relations...") + begin = 0 + with zenbenchmark.record_duration("create_first_relation"): + ep.safe_psql("CREATE TABLE IF NOT EXISTS table_begin (id SERIAL PRIMARY KEY, data TEXT)") + with zenbenchmark.record_duration("create_many_relations"): + while True: + end = begin + step + ep.safe_psql_many( + [ + "BEGIN", + f"""DO $$ + DECLARE + i INT; + table_name TEXT; + BEGIN + FOR i IN {begin}..{end} LOOP + table_name := 'table_' || i; + EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)'; + END LOOP; + END $$; + """, + "COMMIT", + ] + ) + begin = end + if begin >= n: + break + with zenbenchmark.record_duration("create_last_relation"): + ep.safe_psql(f"CREATE TABLE IF NOT EXISTS table_{begin} (id SERIAL PRIMARY KEY, data TEXT)") diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index 411888efbc..afc7ef3e01 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -332,8 +332,10 @@ def test_sql_regress( @skip_in_debug_build("only run with release build") +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_tx_abort_with_many_relations( neon_env_builder: NeonEnvBuilder, + reldir_type: str, ): """ This is not a pg_regress test as such, but perhaps it should be -- this test exercises postgres @@ -342,7 +344,11 @@ def test_tx_abort_with_many_relations( Reproducer for https://github.com/neondatabase/neon/issues/9505 """ - env = neon_env_builder.init_start() + env = neon_env_builder.init_start( + initial_tenant_conf={ + "rel_size_v2_enabled": "true" if reldir_type == "v2" else "false", + } + ) ep = env.endpoints.create_start( "main", tenant_id=env.initial_tenant, @@ -354,48 +360,65 @@ def test_tx_abort_with_many_relations( # How many relations: this number is tuned to be long enough to take tens of seconds # if the rollback code path is buggy, tripping the test's timeout. - n = 4000 + if reldir_type == "v1": + n = 4000 + step = 4000 + else: + n = 100000 + step = 5000 def create(): # Create many relations log.info(f"Creating {n} relations...") - ep.safe_psql_many( - [ - "BEGIN", - f"""DO $$ - DECLARE - i INT; - table_name TEXT; - BEGIN - FOR i IN 1..{n} LOOP - table_name := 'table_' || i; - EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)'; - END LOOP; - END $$; - """, - "COMMIT", - ] - ) + begin = 0 + while True: + end = begin + step + ep.safe_psql_many( + [ + "BEGIN", + f"""DO $$ + DECLARE + i INT; + table_name TEXT; + BEGIN + FOR i IN {begin}..{end} LOOP + table_name := 'table_' || i; + EXECUTE 'CREATE TABLE IF NOT EXISTS ' || table_name || ' (id SERIAL PRIMARY KEY, data TEXT)'; + END LOOP; + END $$; + """, + "COMMIT", + ] + ) + begin = end + if begin >= n: + break def truncate(): # Truncate relations, then roll back the transaction containing the truncations log.info(f"Truncating {n} relations...") - ep.safe_psql_many( - [ - "BEGIN", - f"""DO $$ - DECLARE - i INT; - table_name TEXT; - BEGIN - FOR i IN 1..{n} LOOP - table_name := 'table_' || i; - EXECUTE 'TRUNCATE ' || table_name ; - END LOOP; - END $$; - """, - ] - ) + begin = 0 + while True: + end = begin + step + ep.safe_psql_many( + [ + "BEGIN", + f"""DO $$ + DECLARE + i INT; + table_name TEXT; + BEGIN + FOR i IN {begin}..{end} LOOP + table_name := 'table_' || i; + EXECUTE 'TRUNCATE ' || table_name ; + END LOOP; + END $$; + """, + ] + ) + begin = end + if begin >= n: + break def rollback_and_wait(): log.info(f"Rolling back after truncating {n} relations...") From 015092d259b517f11ce98b2d19a9d3e9df3a633e Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 25 Feb 2025 09:50:39 -0500 Subject: [PATCH 013/207] feat(pageserver): add automatic trigger for gc-compaction (#10798) ## Problem part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes Add the auto trigger for gc-compaction. It computes two values: L1 size and L2 size. When L1 size >= initial trigger threshold, we will trigger an initial gc-compaction. When l1_size / l2_size >= gc_compaction_ratio_percent, we will trigger the "tiered" gc-compaction. --------- Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/config.rs | 2 +- pageserver/src/http/routes.rs | 3 +- pageserver/src/tenant.rs | 33 +- .../src/tenant/remote_timeline_client.rs | 13 + .../tenant/remote_timeline_client/index.rs | 59 +++- pageserver/src/tenant/tasks.rs | 1 + pageserver/src/tenant/timeline.rs | 51 +++- pageserver/src/tenant/timeline/compaction.rs | 289 +++++++++++++++--- pageserver/src/tenant/timeline/delete.rs | 6 + pageserver/src/tenant/timeline/offload.rs | 7 + test_runner/regress/test_compaction.py | 53 ++++ 11 files changed, 446 insertions(+), 71 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 1aff5a7012..5a695c04ed 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -584,7 +584,7 @@ pub mod tenant_conf_defaults { // image layers should be created. pub const DEFAULT_IMAGE_LAYER_CREATION_CHECK_THRESHOLD: u8 = 2; pub const DEFAULT_GC_COMPACTION_ENABLED: bool = false; - pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 10240000; + pub const DEFAULT_GC_COMPACTION_INITIAL_THRESHOLD_KB: u64 = 5 * 1024 * 1024; // 5GB pub const DEFAULT_GC_COMPACTION_RATIO_PERCENT: u64 = 100; } diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 56a84a98a8..9f37fc32a3 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2396,7 +2396,8 @@ async fn timeline_checkpoint_handler( CompactionError::ShuttingDown => ApiError::ShuttingDown, CompactionError::Offload(e) => ApiError::InternalServerError(anyhow::anyhow!(e)), CompactionError::CollectKeySpaceError(e) => ApiError::InternalServerError(anyhow::anyhow!(e)), - CompactionError::Other(e) => ApiError::InternalServerError(e) + CompactionError::Other(e) => ApiError::InternalServerError(e), + CompactionError::AlreadyRunning(_) => ApiError::InternalServerError(anyhow::anyhow!(e)), } )?; } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 56718f5294..46f9c9a427 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -34,6 +34,7 @@ use pageserver_api::shard::TenantShardId; use remote_storage::DownloadError; use remote_storage::GenericRemoteStorage; use remote_storage::TimeoutOrCancel; +use remote_timeline_client::index::GcCompactionState; use remote_timeline_client::manifest::{ OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION, }; @@ -1168,6 +1169,7 @@ impl Tenant { resources, CreateTimelineCause::Load, idempotency.clone(), + index_part.gc_compaction.clone(), )?; let disk_consistent_lsn = timeline.get_disk_consistent_lsn(); anyhow::ensure!( @@ -3125,20 +3127,19 @@ impl Tenant { // If we're done compacting, check the scheduled GC compaction queue for more work. if outcome == CompactionOutcome::Done { - let queue = self - .scheduled_compaction_tasks - .lock() - .unwrap() - .get(&timeline.timeline_id) - .cloned(); - if let Some(queue) = queue { - outcome = queue - .iteration(cancel, ctx, &self.gc_block, &timeline) - .instrument( - info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id), - ) - .await?; - } + let queue = { + let mut guard = self.scheduled_compaction_tasks.lock().unwrap(); + guard + .entry(timeline.timeline_id) + .or_insert_with(|| Arc::new(GcCompactionQueue::new())) + .clone() + }; + outcome = queue + .iteration(cancel, ctx, &self.gc_block, &timeline) + .instrument( + info_span!("gc_compact_timeline", timeline_id = %timeline.timeline_id), + ) + .await?; } // If we're done compacting, offload the timeline if requested. @@ -3195,6 +3196,7 @@ impl Tenant { .unwrap() .fail(&CIRCUIT_BREAKERS_BROKEN, err); } + CompactionError::AlreadyRunning(_) => {} } } @@ -4150,6 +4152,7 @@ impl Tenant { resources: TimelineResources, cause: CreateTimelineCause, create_idempotency: CreateTimelineIdempotency, + gc_compaction_state: Option, ) -> anyhow::Result> { let state = match cause { CreateTimelineCause::Load => { @@ -4181,6 +4184,7 @@ impl Tenant { state, self.attach_wal_lag_cooldown.clone(), create_idempotency, + gc_compaction_state, self.cancel.child_token(), ); @@ -5246,6 +5250,7 @@ impl Tenant { resources, CreateTimelineCause::Load, create_guard.idempotency.clone(), + None, ) .context("Failed to create timeline data structure")?; diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 713efbb9a4..e01da48052 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -184,6 +184,7 @@ use camino::Utf8Path; use chrono::{NaiveDateTime, Utc}; pub(crate) use download::download_initdb_tar_zst; +use index::GcCompactionState; use pageserver_api::models::TimelineArchivalState; use pageserver_api::shard::{ShardIndex, TenantShardId}; use regex::Regex; @@ -913,6 +914,18 @@ impl RemoteTimelineClient { Ok(()) } + /// Launch an index-file upload operation in the background, setting `import_pgdata` field. + pub(crate) fn schedule_index_upload_for_gc_compaction_state_update( + self: &Arc, + gc_compaction_state: GcCompactionState, + ) -> anyhow::Result<()> { + let mut guard = self.upload_queue.lock().unwrap(); + let upload_queue = guard.initialized_mut()?; + upload_queue.dirty.gc_compaction = Some(gc_compaction_state); + self.schedule_index_upload(upload_queue); + Ok(()) + } + /// /// Launch an index-file upload operation in the background, if necessary. /// diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index b8b18005fd..727b25fbf4 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -85,9 +85,36 @@ pub struct IndexPart { #[serde(skip_serializing_if = "Option::is_none", default)] pub(crate) rel_size_migration: Option, - /// The LSN of gc-compaction horizon. Once gc-compaction is finished for all layer files below an LSN, this LSN will be updated. + /// Not used anymore -- kept here for backwards compatibility. Merged into the `gc_compaction` field. #[serde(skip_serializing_if = "Option::is_none", default)] - pub(crate) l2_lsn: Option, + l2_lsn: Option, + + /// State for the garbage-collecting compaction pass. + /// + /// Garbage-collecting compaction (gc-compaction) prunes `Value`s that are outside + /// the PITR window and not needed by child timelines. + /// + /// A commonly used synonym for this compaction pass is + /// "bottommost-compaction" because the affected LSN range + /// is the "bottom" of the (key,lsn) map. + /// + /// Gc-compaction is a quite expensive operation; that's why we use + /// trigger condition. + /// This field here holds the state pertaining to that trigger condition + /// and (in future) to the progress of the gc-compaction, so that it's + /// resumable across restarts & migrations. + /// + /// Note that the underlying algorithm is _also_ called `gc-compaction` + /// in most places & design docs; but in fact it is more flexible than + /// just the specific use case here; it needs a new name. + #[serde(skip_serializing_if = "Option::is_none", default)] + pub(crate) gc_compaction: Option, +} + +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub struct GcCompactionState { + /// The upper bound of the last completed garbage-collecting compaction, aka. L2 LSN. + pub(crate) last_completed_lsn: Lsn, } #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] @@ -123,10 +150,11 @@ impl IndexPart { /// - 10: +import_pgdata /// - 11: +rel_size_migration /// - 12: +l2_lsn - const LATEST_VERSION: usize = 12; + /// - 13: +gc_compaction + const LATEST_VERSION: usize = 13; // Versions we may see when reading from a bucket. - pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + pub const KNOWN_VERSIONS: &'static [usize] = &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]; pub const FILE_NAME: &'static str = "index_part.json"; @@ -144,6 +172,7 @@ impl IndexPart { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, } } @@ -450,6 +479,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -497,6 +527,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -545,6 +576,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -596,6 +628,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let empty_layers_parsed = IndexPart::from_json_bytes(empty_layers_json.as_bytes()).unwrap(); @@ -642,6 +675,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -691,6 +725,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -745,6 +780,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -804,6 +840,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -864,6 +901,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -929,6 +967,7 @@ mod tests { import_pgdata: None, rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -1007,6 +1046,7 @@ mod tests { }))), rel_size_migration: None, l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -1086,6 +1126,7 @@ mod tests { }))), rel_size_migration: Some(RelSizeMigration::Legacy), l2_lsn: None, + gc_compaction: None, }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); @@ -1093,7 +1134,7 @@ mod tests { } #[test] - fn v12_l2_lsn_is_parsed() { + fn v12_v13_l2_gc_ompaction_is_parsed() { let example = r#"{ "version": 12, "layer_metadata":{ @@ -1124,7 +1165,10 @@ mod tests { } }, "rel_size_migration": "legacy", - "l2_lsn": "0/16960E8" + "l2_lsn": "0/16960E8", + "gc_compaction": { + "last_completed_lsn": "0/16960E8" + } }"#; let expected = IndexPart { @@ -1166,6 +1210,9 @@ mod tests { }))), rel_size_migration: Some(RelSizeMigration::Legacy), l2_lsn: Some("0/16960E8".parse::().unwrap()), + gc_compaction: Some(GcCompactionState { + last_completed_lsn: "0/16960E8".parse::().unwrap(), + }), }; let part = IndexPart::from_json_bytes(example.as_bytes()).unwrap(); diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 5e63f59fd8..b12655b0f3 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -295,6 +295,7 @@ fn log_compaction_error( let level = match err { ShuttingDown => return, Offload(_) => Level::ERROR, + AlreadyRunning(_) => Level::ERROR, CollectKeySpaceError(CollectKeySpaceError::Cancelled) => Level::INFO, CollectKeySpaceError(_) => Level::ERROR, _ if task_cancelled => Level::INFO, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 319c5e3d87..a80d407d54 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -19,7 +19,7 @@ use arc_swap::{ArcSwap, ArcSwapOption}; use bytes::Bytes; use camino::Utf8Path; use chrono::{DateTime, Utc}; -use compaction::CompactionOutcome; +use compaction::{CompactionOutcome, GcCompactionCombinedSettings}; use enumset::EnumSet; use fail::fail_point; use futures::FutureExt; @@ -148,6 +148,7 @@ use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; +use super::remote_timeline_client::index::GcCompactionState; use super::{ config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized, MaybeOffloaded, @@ -323,6 +324,9 @@ pub struct Timeline { ancestor_timeline: Option>, ancestor_lsn: Lsn, + // The LSN of gc-compaction that was last applied to this timeline. + gc_compaction_state: ArcSwap>, + pub(super) metrics: TimelineMetrics, // `Timeline` doesn't write these metrics itself, but it manages the lifetime. Code @@ -1889,6 +1893,7 @@ impl Timeline { // abruptly stall nor resume L0 flushes in these cases. Err(CompactionError::Offload(_)) => {} Err(CompactionError::ShuttingDown) => {} + Err(CompactionError::AlreadyRunning(_)) => {} }; result @@ -2531,6 +2536,31 @@ impl Timeline { ) } + fn get_gc_compaction_settings(&self) -> GcCompactionCombinedSettings { + let tenant_conf = &self.tenant_conf.load(); + let gc_compaction_enabled = tenant_conf + .tenant_conf + .gc_compaction_enabled + .unwrap_or(self.conf.default_tenant_conf.gc_compaction_enabled); + let gc_compaction_initial_threshold_kb = tenant_conf + .tenant_conf + .gc_compaction_initial_threshold_kb + .unwrap_or( + self.conf + .default_tenant_conf + .gc_compaction_initial_threshold_kb, + ); + let gc_compaction_ratio_percent = tenant_conf + .tenant_conf + .gc_compaction_ratio_percent + .unwrap_or(self.conf.default_tenant_conf.gc_compaction_ratio_percent); + GcCompactionCombinedSettings { + gc_compaction_enabled, + gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent, + } + } + fn get_image_creation_preempt_threshold(&self) -> usize { let tenant_conf = self.tenant_conf.load(); tenant_conf @@ -2609,6 +2639,7 @@ impl Timeline { state: TimelineState, attach_wal_lag_cooldown: Arc>, create_idempotency: crate::tenant::CreateTimelineIdempotency, + gc_compaction_state: Option, cancel: CancellationToken, ) -> Arc { let disk_consistent_lsn = metadata.disk_consistent_lsn(); @@ -2667,6 +2698,8 @@ impl Timeline { }), disk_consistent_lsn: AtomicLsn::new(disk_consistent_lsn.0), + gc_compaction_state: ArcSwap::new(Arc::new(gc_compaction_state)), + last_freeze_at: AtomicLsn::new(disk_consistent_lsn.0), last_freeze_ts: RwLock::new(Instant::now()), @@ -2831,6 +2864,20 @@ impl Timeline { ); } + pub(crate) fn update_gc_compaction_state( + &self, + gc_compaction_state: GcCompactionState, + ) -> anyhow::Result<()> { + self.gc_compaction_state + .store(Arc::new(Some(gc_compaction_state.clone()))); + self.remote_client + .schedule_index_upload_for_gc_compaction_state_update(gc_compaction_state) + } + + pub(crate) fn get_gc_compaction_state(&self) -> Option { + self.gc_compaction_state.load_full().as_ref().clone() + } + /// Creates and starts the wal receiver. /// /// This function is expected to be called at most once per Timeline's lifecycle @@ -5373,6 +5420,8 @@ pub(crate) enum CompactionError { CollectKeySpaceError(CollectKeySpaceError), #[error(transparent)] Other(anyhow::Error), + #[error("Compaction already running: {0}")] + AlreadyRunning(&'static str), } impl From for CompactionError { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index bfb610e0d9..c6ef5165ef 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -20,11 +20,13 @@ use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; use itertools::Itertools; +use once_cell::sync::Lazy; use pageserver_api::key::KEY_SIZE; use pageserver_api::keyspace::ShardedRange; use pageserver_api::models::CompactInfoResponse; use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId}; use serde::Serialize; +use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, info_span, trace, warn, Instrument}; use utils::critical; @@ -37,6 +39,7 @@ use crate::statvfs::Statvfs; use crate::tenant::checks::check_valid_layermap; use crate::tenant::gc_block::GcBlock; use crate::tenant::layer_map::LayerMap; +use crate::tenant::remote_timeline_client::index::GcCompactionState; use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::batch_split_writer::{ BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter, @@ -77,13 +80,22 @@ impl std::fmt::Display for GcCompactionJobId { } } +pub struct GcCompactionCombinedSettings { + pub gc_compaction_enabled: bool, + pub gc_compaction_initial_threshold_kb: u64, + pub gc_compaction_ratio_percent: u64, +} + #[derive(Debug, Clone)] pub enum GcCompactionQueueItem { - Manual(CompactOptions), + MetaJob { + /// Compaction options + options: CompactOptions, + /// Whether the compaction is triggered automatically (determines whether we need to update L2 LSN) + auto: bool, + }, SubCompactionJob(CompactOptions), - #[allow(dead_code)] - UpdateL2Lsn(Lsn), - Notify(GcCompactionJobId), + Notify(GcCompactionJobId, Option), } impl GcCompactionQueueItem { @@ -93,7 +105,7 @@ impl GcCompactionQueueItem { running: bool, ) -> Option { match self { - GcCompactionQueueItem::Manual(options) => Some(CompactInfoResponse { + GcCompactionQueueItem::MetaJob { options, .. } => Some(CompactInfoResponse { compact_key_range: options.compact_key_range, compact_lsn_range: options.compact_lsn_range, sub_compaction: options.sub_compaction, @@ -107,17 +119,22 @@ impl GcCompactionQueueItem { running, job_id: id.0, }), - GcCompactionQueueItem::UpdateL2Lsn(_) => None, - GcCompactionQueueItem::Notify(_) => None, + GcCompactionQueueItem::Notify(_, _) => None, } } } +#[derive(Default)] +struct GcCompactionGuardItems { + notify: Option>, + gc_guard: Option, + permit: Option, +} + struct GcCompactionQueueInner { running: Option<(GcCompactionJobId, GcCompactionQueueItem)>, queued: VecDeque<(GcCompactionJobId, GcCompactionQueueItem)>, - notify: HashMap>, - gc_guards: HashMap, + guards: HashMap, last_id: GcCompactionJobId, } @@ -137,14 +154,18 @@ pub struct GcCompactionQueue { consumer_lock: tokio::sync::Mutex<()>, } +static CONCURRENT_GC_COMPACTION_TASKS: Lazy> = Lazy::new(|| { + // Only allow two timelines on one pageserver to run gc compaction at a time. + Arc::new(Semaphore::new(2)) +}); + impl GcCompactionQueue { pub fn new() -> Self { GcCompactionQueue { inner: std::sync::Mutex::new(GcCompactionQueueInner { running: None, queued: VecDeque::new(), - notify: HashMap::new(), - gc_guards: HashMap::new(), + guards: HashMap::new(), last_id: GcCompactionJobId(0), }), consumer_lock: tokio::sync::Mutex::new(()), @@ -154,8 +175,9 @@ impl GcCompactionQueue { pub fn cancel_scheduled(&self) { let mut guard = self.inner.lock().unwrap(); guard.queued.clear(); - guard.notify.clear(); - guard.gc_guards.clear(); + // TODO: if there is a running job, we should keep the gc guard. However, currently, the cancel + // API is only used for testing purposes, so we can drop everything here. + guard.guards.clear(); } /// Schedule a manual compaction job. @@ -166,29 +188,162 @@ impl GcCompactionQueue { ) -> GcCompactionJobId { let mut guard = self.inner.lock().unwrap(); let id = guard.next_id(); - guard - .queued - .push_back((id, GcCompactionQueueItem::Manual(options))); - if let Some(notify) = notify { - guard.notify.insert(id, notify); - } + guard.queued.push_back(( + id, + GcCompactionQueueItem::MetaJob { + options, + auto: false, + }, + )); + guard.guards.entry(id).or_default().notify = notify; info!("scheduled compaction job id={}", id); id } + /// Schedule an auto compaction job. + fn schedule_auto_compaction( + &self, + options: CompactOptions, + permit: OwnedSemaphorePermit, + ) -> GcCompactionJobId { + let mut guard = self.inner.lock().unwrap(); + let id = guard.next_id(); + guard.queued.push_back(( + id, + GcCompactionQueueItem::MetaJob { + options, + auto: true, + }, + )); + guard.guards.entry(id).or_default().permit = Some(permit); + id + } + /// Trigger an auto compaction. - #[allow(dead_code)] - pub fn trigger_auto_compaction(&self, _: &Arc) {} + pub async fn trigger_auto_compaction(&self, timeline: &Arc) { + let GcCompactionCombinedSettings { + gc_compaction_enabled, + gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent, + } = timeline.get_gc_compaction_settings(); + if !gc_compaction_enabled { + return; + } + if self.remaining_jobs_num() > 0 { + // Only schedule auto compaction when the queue is empty + return; + } + if timeline.ancestor_timeline().is_some() { + // Do not trigger auto compaction for child timelines. We haven't tested + // it enough in staging yet. + return; + } + + let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else { + // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure + // the fairness of the lock across timelines. We should listen for both `acquire` and `l0_compaction_trigger` + // to ensure the fairness while avoid starving other tasks. + return; + }; + + let gc_compaction_state = timeline.get_gc_compaction_state(); + let l2_lsn = gc_compaction_state + .map(|x| x.last_completed_lsn) + .unwrap_or(Lsn::INVALID); + + let layers = { + let guard = timeline.layers.read().await; + let layer_map = guard.layer_map().unwrap(); + layer_map.iter_historic_layers().collect_vec() + }; + let mut l2_size: u64 = 0; + let mut l1_size = 0; + let gc_cutoff = *timeline.get_applied_gc_cutoff_lsn(); + for layer in layers { + if layer.lsn_range.start <= l2_lsn { + l2_size += layer.file_size(); + } else if layer.lsn_range.start <= gc_cutoff { + l1_size += layer.file_size(); + } + } + + fn trigger_compaction( + l1_size: u64, + l2_size: u64, + gc_compaction_initial_threshold_kb: u64, + gc_compaction_ratio_percent: u64, + ) -> bool { + const AUTO_TRIGGER_LIMIT: u64 = 150 * 1024 * 1024 * 1024; // 150GB + if l1_size >= AUTO_TRIGGER_LIMIT || l2_size >= AUTO_TRIGGER_LIMIT { + // Do not auto-trigger when physical size >= 150GB + return false; + } + // initial trigger + if l2_size == 0 && l1_size >= gc_compaction_initial_threshold_kb * 1024 { + info!( + "trigger auto-compaction because l1_size={} >= gc_compaction_initial_threshold_kb={}", + l1_size, + gc_compaction_initial_threshold_kb + ); + return true; + } + // size ratio trigger + if l2_size == 0 { + return false; + } + if l1_size as f64 / l2_size as f64 >= (gc_compaction_ratio_percent as f64 / 100.0) { + info!( + "trigger auto-compaction because l1_size={} / l2_size={} > gc_compaction_ratio_percent={}", + l1_size, + l2_size, + gc_compaction_ratio_percent + ); + return true; + } + false + } + + if trigger_compaction( + l1_size, + l2_size, + gc_compaction_initial_threshold_kb, + gc_compaction_ratio_percent, + ) { + self.schedule_auto_compaction( + CompactOptions { + flags: { + let mut flags = EnumSet::new(); + flags |= CompactFlags::EnhancedGcBottomMostCompaction; + flags + }, + sub_compaction: true, + compact_key_range: None, + compact_lsn_range: None, + sub_compaction_max_job_size_mb: None, + }, + permit, + ); + info!( + "scheduled auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}", + l1_size, l2_size, l2_lsn, gc_cutoff + ); + } else { + info!( + "did not trigger auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}", + l1_size, l2_size, l2_lsn, gc_cutoff + ); + } + } /// Notify the caller the job has finished and unblock GC. fn notify_and_unblock(&self, id: GcCompactionJobId) { info!("compaction job id={} finished", id); let mut guard = self.inner.lock().unwrap(); - if let Some(blocking) = guard.gc_guards.remove(&id) { - drop(blocking) - } - if let Some(tx) = guard.notify.remove(&id) { - let _ = tx.send(()); + if let Some(items) = guard.guards.remove(&id) { + drop(items.gc_guard); + if let Some(tx) = items.notify { + let _ = tx.send(()); + } } } @@ -198,9 +353,10 @@ impl GcCompactionQueue { options: CompactOptions, timeline: &Arc, gc_block: &GcBlock, + auto: bool, ) -> Result<(), CompactionError> { info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); - let jobs: Vec = timeline + let jobs = timeline .gc_compaction_split_jobs( GcCompactJob::from_compact_options(options.clone()), options.sub_compaction_max_job_size_mb, @@ -223,6 +379,9 @@ impl GcCompactionQueue { let jobs_len = jobs.len(); let mut pending_tasks = Vec::new(); + // gc-compaction might pick more layers or fewer layers to compact. The L2 LSN does not need to be accurate. + // And therefore, we simply assume the maximum LSN of all jobs is the expected L2 LSN. + let expected_l2_lsn = jobs.iter().map(|job| job.compact_lsn_range.end).max(); for job in jobs { // Unfortunately we need to convert the `GcCompactJob` back to `CompactionOptions` // until we do further refactors to allow directly call `compact_with_gc`. @@ -240,10 +399,16 @@ impl GcCompactionQueue { }; pending_tasks.push(GcCompactionQueueItem::SubCompactionJob(options)); } - pending_tasks.push(GcCompactionQueueItem::Notify(id)); + + if !auto { + pending_tasks.push(GcCompactionQueueItem::Notify(id, None)); + } else { + pending_tasks.push(GcCompactionQueueItem::Notify(id, expected_l2_lsn)); + } + { let mut guard = self.inner.lock().unwrap(); - guard.gc_guards.insert(id, gc_guard); + guard.guards.entry(id).or_default().gc_guard = Some(gc_guard); let mut tasks = Vec::new(); for task in pending_tasks { let id = guard.next_id(); @@ -267,29 +432,41 @@ impl GcCompactionQueue { gc_block: &GcBlock, timeline: &Arc, ) -> Result { - let _one_op_at_a_time_guard = self.consumer_lock.lock().await; - let has_pending_tasks; - let (id, item) = { - let mut guard = self.inner.lock().unwrap(); - let Some((id, item)) = guard.queued.pop_front() else { - return Ok(CompactionOutcome::Done); - }; - guard.running = Some((id, item.clone())); - has_pending_tasks = !guard.queued.is_empty(); - (id, item) + let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else { + return Err(CompactionError::AlreadyRunning("cannot run gc-compaction because another gc-compaction is running. This should not happen because we only call this function from the gc-compaction queue.")); + }; + let has_pending_tasks; + let Some((id, item)) = ({ + let mut guard = self.inner.lock().unwrap(); + if let Some((id, item)) = guard.queued.pop_front() { + guard.running = Some((id, item.clone())); + has_pending_tasks = !guard.queued.is_empty(); + Some((id, item)) + } else { + has_pending_tasks = false; + None + } + }) else { + self.trigger_auto_compaction(timeline).await; + // Always yield after triggering auto-compaction. Gc-compaction is a low-priority task and we + // have not implemented preemption mechanism yet. We always want to yield it to more important + // tasks if there is one. + return Ok(CompactionOutcome::Done); }; - match item { - GcCompactionQueueItem::Manual(options) => { + GcCompactionQueueItem::MetaJob { options, auto } => { if !options .flags .contains(CompactFlags::EnhancedGcBottomMostCompaction) { warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", options); } else if options.sub_compaction { - self.handle_sub_compaction(id, options, timeline, gc_block) + info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + self.handle_sub_compaction(id, options, timeline, gc_block, auto) .await?; } else { + // Auto compaction always enables sub-compaction so we don't need to handle update_l2_lsn + // in this branch. let gc_guard = match gc_block.start().await { Ok(guard) => guard, Err(e) => { @@ -301,20 +478,37 @@ impl GcCompactionQueue { }; { let mut guard = self.inner.lock().unwrap(); - guard.gc_guards.insert(id, gc_guard); + guard.guards.entry(id).or_default().gc_guard = Some(gc_guard); } let _ = timeline.compact_with_options(cancel, options, ctx).await?; self.notify_and_unblock(id); } } GcCompactionQueueItem::SubCompactionJob(options) => { + // TODO: error handling, clear the queue if any task fails? let _ = timeline.compact_with_options(cancel, options, ctx).await?; } - GcCompactionQueueItem::Notify(id) => { + GcCompactionQueueItem::Notify(id, l2_lsn) => { self.notify_and_unblock(id); - } - GcCompactionQueueItem::UpdateL2Lsn(_) => { - unreachable!() + if let Some(l2_lsn) = l2_lsn { + let current_l2_lsn = timeline + .get_gc_compaction_state() + .map(|x| x.last_completed_lsn) + .unwrap_or(Lsn::INVALID); + if l2_lsn >= current_l2_lsn { + info!("l2_lsn updated to {}", l2_lsn); + timeline + .update_gc_compaction_state(GcCompactionState { + last_completed_lsn: l2_lsn, + }) + .map_err(CompactionError::Other)?; + } else { + warn!( + "l2_lsn updated to {} but it is less than the current l2_lsn {}", + l2_lsn, current_l2_lsn + ); + } + } } } { @@ -339,7 +533,6 @@ impl GcCompactionQueue { (guard.running.clone(), guard.queued.clone()) } - #[allow(dead_code)] pub fn remaining_jobs_num(&self) -> usize { let guard = self.inner.lock().unwrap(); guard.queued.len() + if guard.running.is_some() { 1 } else { 0 } diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 841b2fa1c7..f4ae1ea166 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -137,6 +137,11 @@ async fn remove_maybe_offloaded_timeline_from_tenant( timelines.remove(&timeline.timeline_id).expect( "timeline that we were deleting was concurrently removed from 'timelines' map", ); + tenant + .scheduled_compaction_tasks + .lock() + .unwrap() + .remove(&timeline.timeline_id); } TimelineOrOffloaded::Offloaded(timeline) => { let offloaded_timeline = timelines_offloaded @@ -300,6 +305,7 @@ impl DeleteTimelineFlow { // Thus we need to skip the validation here. CreateTimelineCause::Delete, crate::tenant::CreateTimelineIdempotency::FailWithConflict, // doesn't matter what we put here + None, // doesn't matter what we put here ) .context("create_timeline_struct")?; diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 93e5a1100d..424a75005d 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -143,5 +143,12 @@ fn remove_timeline_from_tenant( .remove(&timeline.timeline_id) .expect("timeline that we were deleting was concurrently removed from 'timelines' map"); + // Clear the compaction queue for this timeline + tenant + .scheduled_compaction_tasks + .lock() + .unwrap() + .remove(&timeline.timeline_id); + Arc::strong_count(&timeline) } diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index c091cd0869..ce8ed3c7c5 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -466,6 +466,59 @@ def test_pageserver_gc_compaction_interrupt(neon_env_builder: NeonEnvBuilder): ps_http.timeline_gc(tenant_id, timeline_id, None) +@skip_in_debug_build("only run with release build") +def test_pageserver_gc_compaction_trigger(neon_env_builder: NeonEnvBuilder): + SMOKE_CONF = { + # Run both gc and gc-compaction. + "gc_period": "5s", + "compaction_period": "5s", + # No PiTR interval and small GC horizon + "pitr_interval": "0s", + "gc_horizon": f"{1024 * 16}", + "lsn_lease_length": "0s", + "gc_compaction_enabled": "true", + "gc_compaction_initial_threshold_kb": "16", + "gc_compaction_ratio_percent": "50", + # Do not generate image layers with create_image_layers + "image_layer_creation_check_threshold": "100", + } + + env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF) + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + row_count = 10000 + churn_rounds = 20 + + ps_http = env.pageserver.http_client() + + workload = Workload(env, tenant_id, timeline_id) + workload.init(env.pageserver.id) + + log.info("Writing initial data ...") + workload.write_rows(row_count, env.pageserver.id) + + ps_http.timeline_gc( + tenant_id, timeline_id, None + ) # Force refresh gc info to have gc_cutoff generated + + def compaction_finished(): + queue_depth = len(ps_http.timeline_compact_info(tenant_id, timeline_id)) + assert queue_depth == 0 + + for i in range(1, churn_rounds + 1): + log.info(f"Running churn round {i}/{churn_rounds} ...") + workload.churn_rows(row_count, env.pageserver.id, upload=True) + wait_until(compaction_finished, timeout=60) + workload.validate(env.pageserver.id) + + # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked) + env.pageserver.assert_log_contains("gc_compact_timeline.*picked .* layers for compaction") + + log.info("Validating at workload end ...") + workload.validate(env.pageserver.id) + + # Stripe sizes in number of pages. TINY_STRIPES = 16 LARGE_STRIPES = 32768 From 1fb2faab5bc87c41bef086975dbc145ec77dbc1f Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Tue, 25 Feb 2025 17:00:43 +0100 Subject: [PATCH 014/207] Rename the patch files for the semver test (#10966) ## Problem The patch for `semver` extensions relies on `PG_VERSION` environment variable. The files were named without the letter `v` so script cannot find them. ## Summary of changes The patch files were renamed. --- .../{test-upgrade-16.patch => test-upgrade-v16.patch} | 0 .../{test-upgrade-17.patch => test-upgrade-v17.patch} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docker-compose/ext-src/pg_semver-src/{test-upgrade-16.patch => test-upgrade-v16.patch} (100%) rename docker-compose/ext-src/pg_semver-src/{test-upgrade-17.patch => test-upgrade-v17.patch} (100%) diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch similarity index 100% rename from docker-compose/ext-src/pg_semver-src/test-upgrade-16.patch rename to docker-compose/ext-src/pg_semver-src/test-upgrade-v16.patch diff --git a/docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch b/docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch similarity index 100% rename from docker-compose/ext-src/pg_semver-src/test-upgrade-17.patch rename to docker-compose/ext-src/pg_semver-src/test-upgrade-v17.patch From c69ebb4486d6678500ebc13282f48b0525547beb Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 25 Feb 2025 12:37:23 -0500 Subject: [PATCH 015/207] fix(ci): extend timeout to 75min (#10963) 60min is not enough for debug builds Signed-off-by: Alex Chi Z --- .github/workflows/_build-and-test-locally.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 3740e6dc9c..30fde127b0 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -337,7 +337,7 @@ jobs: - name: Pytest regression tests continue-on-error: ${{ matrix.lfc_state == 'with-lfc' && inputs.build-type == 'debug' }} uses: ./.github/actions/run-python-test-set - timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 60 || 180 }} + timeout-minutes: ${{ inputs.sanitizers != 'enabled' && 75 || 180 }} with: build_type: ${{ inputs.build-type }} test_selection: regress From d05606252d4d899ad9d779527d41e4f855c2e3b0 Mon Sep 17 00:00:00 2001 From: Suhas Thalanki <54014218+thesuhas@users.noreply.github.com> Date: Tue, 25 Feb 2025 14:26:14 -0500 Subject: [PATCH 016/207] fix: only showing LSN for static computes in `neon endpoint list` (#10931) ## Problem `neon endpoint list` shows a different LSN than what the state of the replica is. This is mainly down to what we define as LSN in this output. If we define it as the LSN that a compute was started with, it only makes sense to show it for static computes. ## Summary of changes Removed the output of `last_record_lsn` for primary/hot standby computes. Closes: https://github.com/neondatabase/neon/issues/5825 --------- Co-authored-by: Tristan Partin --- control_plane/src/bin/neon_local.rs | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 02d793400a..7d908ccae9 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -887,20 +887,6 @@ fn print_timeline( Ok(()) } -/// Returns a map of timeline IDs to timeline_id@lsn strings. -/// Connects to the pageserver to query this information. -async fn get_timeline_infos( - env: &local_env::LocalEnv, - tenant_shard_id: &TenantShardId, -) -> Result> { - Ok(get_default_pageserver(env) - .timeline_list(tenant_shard_id) - .await? - .into_iter() - .map(|timeline_info| (timeline_info.timeline_id, timeline_info)) - .collect()) -} - /// Helper function to get tenant id from an optional --tenant_id option or from the config file fn get_tenant_id( tenant_id_arg: Option, @@ -1251,12 +1237,6 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res // TODO(sharding): this command shouldn't have to specify a shard ID: we should ask the storage controller // where shard 0 is attached, and query there. let tenant_shard_id = get_tenant_shard_id(args.tenant_shard_id, env)?; - let timeline_infos = get_timeline_infos(env, &tenant_shard_id) - .await - .unwrap_or_else(|e| { - eprintln!("Failed to load timeline info: {}", e); - HashMap::new() - }); let timeline_name_mappings = env.timeline_name_mappings(); @@ -1285,12 +1265,9 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res lsn.to_string() } _ => { - // -> primary endpoint or hot replica - // Use the LSN at the end of the timeline. - timeline_infos - .get(&endpoint.timeline_id) - .map(|bi| bi.last_record_lsn.to_string()) - .unwrap_or_else(|| "?".to_string()) + // As the LSN here refers to the one that the compute is started with, + // we display nothing as it is a primary/hot standby compute. + "---".to_string() } }; From dc975d554ab9d05ab1c6c1fdc8baeee9079efa22 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Tue, 25 Feb 2025 21:51:38 +0200 Subject: [PATCH 017/207] Incremenet getpage histogram in prefetch_lookup (#10965) ## Problem PR https://github.com/neondatabase/neon/pull/10442 added prefetch_lookup function. It changed handling of getpage requests in compute. Before: 1. Lookup in LFC (return if found) 2. Register prefetch buffer 3. Wait prefetch result (increment getpage_hist) Now: 1. Lookup prefetch ring (return if prefetch request is already completed) 2. Lookup in LFC (return if found) 3. Register prefetch buffer 4. Wait prefetch result (increment getpage_hist) So if prefetch result is already available, then get page histogram is not incremented. It case failure of some test_throughtput benchmarks: https://neondb.slack.com/archives/C033RQ5SPDH/p1740425527249499 ## Summary of changes Increment getpage histogram in `prefetch_lookup` Co-authored-by: Konstantin Knizhnik --- pgxn/neon/pagestore_smgr.c | 1 + 1 file changed, 1 insertion(+) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 4a79acd777..091ad555e0 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -967,6 +967,7 @@ prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, n BITMAP_SET(mask, i); hits += 1; + inc_getpage_wait(0); } } pgBufferUsage.prefetch.hits += hits; From 920040e40240774219b6607f1f8ef74478dc4b29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 26 Feb 2025 00:51:37 +0100 Subject: [PATCH 018/207] Update storage components to edition 2024 (#10919) Updates storage components to edition 2024. We like to stay on the latest edition if possible. There is no functional changes, however some code changes had to be done to accommodate the edition's breaking changes. The PR has two commits: * the first commit updates storage crates to edition 2024 and appeases `cargo clippy` by changing code. i have accidentially ran the formatter on some files that had other edits. * the second commit performs a `cargo fmt` I would recommend a closer review of the first commit and a less close review of the second one (as it just runs `cargo fmt`). part of https://github.com/neondatabase/neon/issues/10918 --- libs/pageserver_api/Cargo.toml | 2 +- libs/pageserver_api/src/config.rs | 21 +- libs/pageserver_api/src/controller_api.rs | 12 +- libs/pageserver_api/src/key.rs | 30 +- libs/pageserver_api/src/keyspace.rs | 19 +- libs/pageserver_api/src/models.rs | 48 ++- libs/pageserver_api/src/models/utilization.rs | 14 +- libs/pageserver_api/src/record.rs | 2 +- libs/pageserver_api/src/reltag.rs | 6 +- libs/pageserver_api/src/shard.rs | 10 +- libs/pageserver_api/src/upcall_api.rs | 10 +- libs/pageserver_api/src/value.rs | 7 +- libs/remote_storage/Cargo.toml | 2 +- libs/remote_storage/src/azure_blob.rs | 34 +- libs/remote_storage/src/config.rs | 6 +- libs/remote_storage/src/lib.rs | 58 +-- libs/remote_storage/src/local_fs.rs | 54 +-- libs/remote_storage/src/metrics.rs | 4 +- libs/remote_storage/src/s3_bucket.rs | 79 ++-- libs/remote_storage/src/simulate_failures.rs | 11 +- libs/remote_storage/src/support.rs | 13 +- libs/remote_storage/tests/common/tests.rs | 20 +- libs/remote_storage/tests/test_real_azure.rs | 6 +- libs/remote_storage/tests/test_real_s3.rs | 18 +- libs/safekeeper_api/Cargo.toml | 2 +- libs/safekeeper_api/src/membership.rs | 6 +- libs/safekeeper_api/src/models.rs | 15 +- pageserver/Cargo.toml | 2 +- pageserver/benches/bench_ingest.rs | 30 +- pageserver/benches/bench_layer_map.rs | 20 +- pageserver/benches/bench_walredo.rs | 21 +- pageserver/benches/upload_queue.rs | 6 +- pageserver/compaction/src/helpers.rs | 4 +- .../pagebench/src/util/request_stats.rs | 4 +- pageserver/src/assert_u64_eq_usize.rs | 4 +- pageserver/src/aux_file.rs | 2 +- pageserver/src/basebackup.rs | 34 +- pageserver/src/bin/pageserver.rs | 72 ++-- .../src/bin/test_helper_slow_client_reads.rs | 12 +- pageserver/src/config.rs | 35 +- pageserver/src/consumption_metrics.rs | 25 +- .../src/consumption_metrics/disk_cache.rs | 6 +- pageserver/src/consumption_metrics/metrics.rs | 15 +- .../src/consumption_metrics/metrics/tests.rs | 4 +- pageserver/src/consumption_metrics/upload.rs | 16 +- pageserver/src/controller_upcall_client.rs | 34 +- pageserver/src/deletion_queue.rs | 65 ++-- pageserver/src/deletion_queue/deleter.rs | 15 +- pageserver/src/deletion_queue/list_writer.rs | 16 +- pageserver/src/deletion_queue/validator.rs | 25 +- pageserver/src/disk_usage_eviction_task.rs | 52 +-- pageserver/src/http/routes.rs | 169 ++++----- pageserver/src/import_datadir.rs | 20 +- pageserver/src/l0_flush.rs | 3 +- pageserver/src/lib.rs | 12 +- pageserver/src/metrics.rs | 36 +- pageserver/src/page_cache.rs | 27 +- pageserver/src/page_service.rs | 135 ++++--- pageserver/src/pgdatadir_mapping.rs | 132 +++---- pageserver/src/statvfs.rs | 7 +- pageserver/src/task_mgr.rs | 5 +- pageserver/src/tenant.rs | 311 ++++++++-------- pageserver/src/tenant/blob_io.rs | 20 +- pageserver/src/tenant/block_io.rs | 8 +- pageserver/src/tenant/checks.rs | 6 +- pageserver/src/tenant/config.rs | 14 +- pageserver/src/tenant/disk_btree.rs | 32 +- pageserver/src/tenant/ephemeral_file.rs | 28 +- pageserver/src/tenant/gc_block.rs | 3 +- pageserver/src/tenant/gc_result.rs | 5 +- pageserver/src/tenant/layer_map.rs | 47 +-- .../layer_map/historic_layer_coverage.rs | 3 +- pageserver/src/tenant/metadata.rs | 8 +- pageserver/src/tenant/mgr.rs | 100 ++--- .../src/tenant/remote_timeline_client.rs | 170 ++++----- .../tenant/remote_timeline_client/download.rs | 42 +-- .../tenant/remote_timeline_client/index.rs | 12 +- .../tenant/remote_timeline_client/manifest.rs | 3 +- .../tenant/remote_timeline_client/upload.rs | 20 +- pageserver/src/tenant/secondary.rs | 47 +-- pageserver/src/tenant/secondary/downloader.rs | 92 ++--- pageserver/src/tenant/secondary/heatmap.rs | 12 +- .../src/tenant/secondary/heatmap_uploader.rs | 57 ++- pageserver/src/tenant/secondary/scheduler.rs | 17 +- pageserver/src/tenant/size.rs | 21 +- pageserver/src/tenant/storage_layer.rs | 34 +- .../storage_layer/batch_split_writer.rs | 28 +- .../src/tenant/storage_layer/delta_layer.rs | 92 +++-- .../tenant/storage_layer/filter_iterator.rs | 31 +- .../src/tenant/storage_layer/image_layer.rs | 114 +++--- .../tenant/storage_layer/inmemory_layer.rs | 58 +-- .../inmemory_layer/vectored_dio_read.rs | 35 +- pageserver/src/tenant/storage_layer/layer.rs | 36 +- .../src/tenant/storage_layer/layer/tests.rs | 30 +- .../src/tenant/storage_layer/layer_desc.rs | 11 +- .../src/tenant/storage_layer/layer_name.rs | 4 +- .../tenant/storage_layer/merge_iterator.rs | 41 +-- pageserver/src/tenant/tasks.rs | 25 +- pageserver/src/tenant/throttle.rs | 10 +- pageserver/src/tenant/timeline.rs | 347 +++++++++--------- pageserver/src/tenant/timeline/analysis.rs | 3 +- pageserver/src/tenant/timeline/compaction.rs | 153 +++++--- pageserver/src/tenant/timeline/delete.rs | 34 +- .../src/tenant/timeline/detach_ancestor.rs | 39 +- .../src/tenant/timeline/eviction_task.rs | 37 +- pageserver/src/tenant/timeline/handle.rs | 23 +- .../timeline/heatmap_layers_downloader.rs | 3 +- .../src/tenant/timeline/import_pgdata.rs | 8 +- .../src/tenant/timeline/import_pgdata/flow.rs | 56 ++- .../import_pgdata/importbucket_client.rs | 7 +- .../import_pgdata/index_part_format.rs | 3 +- .../timeline/import_pgdata/upcall_api.rs | 5 +- pageserver/src/tenant/timeline/init.rs | 24 +- .../src/tenant/timeline/layer_manager.rs | 37 +- .../src/tenant/timeline/logical_size.rs | 5 +- pageserver/src/tenant/timeline/offload.rs | 4 +- pageserver/src/tenant/timeline/uninit.rs | 23 +- pageserver/src/tenant/timeline/walreceiver.rs | 15 +- .../walreceiver/connection_manager.rs | 87 ++--- .../walreceiver/walreceiver_connection.rs | 90 +++-- pageserver/src/tenant/upload_queue.rs | 205 ++++++++--- pageserver/src/tenant/vectored_blob_io.rs | 13 +- pageserver/src/utilization.rs | 10 +- pageserver/src/virtual_file.rs | 56 +-- pageserver/src/virtual_file/io_engine.rs | 17 +- .../io_engine/tokio_epoll_uring_ext.rs | 14 +- pageserver/src/virtual_file/open_options.rs | 4 +- .../owned_buffers_io/aligned_buffer/buffer.rs | 10 +- .../aligned_buffer/buffer_mut.rs | 28 +- .../owned_buffers_io/aligned_buffer/raw.rs | 8 +- .../owned_buffers_io/io_buf_ext.rs | 5 +- .../virtual_file/owned_buffers_io/slice.rs | 8 +- .../virtual_file/owned_buffers_io/write.rs | 16 +- .../owned_buffers_io/write/flush.rs | 8 +- pageserver/src/walingest.rs | 89 +++-- pageserver/src/walredo.rs | 30 +- pageserver/src/walredo/apply_neon.rs | 3 +- pageserver/src/walredo/process.rs | 34 +- .../src/walredo/process/no_leak_child.rs | 18 +- safekeeper/Cargo.toml | 2 +- safekeeper/benches/receive_wal.rs | 4 +- safekeeper/src/bin/safekeeper.rs | 57 ++- safekeeper/src/broker.rs | 44 +-- safekeeper/src/control_file.rs | 24 +- safekeeper/src/control_file_upgrade.rs | 31 +- safekeeper/src/copy_timeline.rs | 30 +- safekeeper/src/debug_dump.rs | 32 +- safekeeper/src/handler.rs | 33 +- safekeeper/src/http/mod.rs | 6 +- safekeeper/src/http/routes.rs | 60 ++- safekeeper/src/json_ctrl.rs | 15 +- safekeeper/src/lib.rs | 12 +- safekeeper/src/metrics.rs | 28 +- safekeeper/src/patch_control_file.rs | 3 +- safekeeper/src/pull_timeline.rs | 67 ++-- safekeeper/src/receive_wal.rs | 47 ++- safekeeper/src/recovery.rs | 46 ++- safekeeper/src/safekeeper.rs | 85 +++-- safekeeper/src/send_interpreted_wal.rs | 41 +-- safekeeper/src/send_wal.rs | 62 ++-- safekeeper/src/state.rs | 30 +- safekeeper/src/test_utils.rs | 17 +- safekeeper/src/timeline.rs | 46 +-- safekeeper/src/timeline_eviction.rs | 22 +- safekeeper/src/timeline_manager.rs | 49 ++- safekeeper/src/timelines_global_map.rs | 30 +- safekeeper/src/timelines_set.rs | 3 +- safekeeper/src/wal_backup.rs | 29 +- safekeeper/src/wal_backup_partial.rs | 20 +- safekeeper/src/wal_reader_stream.rs | 24 +- safekeeper/src/wal_service.rs | 18 +- safekeeper/src/wal_storage.rs | 37 +- safekeeper/tests/misc_test.rs | 6 +- safekeeper/tests/random_test.rs | 10 +- safekeeper/tests/simple_test.rs | 3 +- safekeeper/tests/walproposer_sim/log.rs | 6 +- .../tests/walproposer_sim/safekeeper.rs | 37 +- .../tests/walproposer_sim/safekeeper_disk.rs | 21 +- .../tests/walproposer_sim/simulation.rs | 31 +- .../tests/walproposer_sim/walproposer_api.rs | 34 +- .../tests/walproposer_sim/walproposer_disk.rs | 3 +- storage_broker/Cargo.toml | 2 +- storage_broker/benches/rps.rs | 8 +- storage_broker/src/bin/storage_broker.rs | 44 +-- storage_broker/src/lib.rs | 18 +- storage_broker/src/metrics.rs | 2 +- storage_controller/Cargo.toml | 2 +- .../src/background_node_operations.rs | 3 +- storage_controller/src/compute_hook.rs | 11 +- storage_controller/src/drain_utils.rs | 23 +- storage_controller/src/heartbeater.rs | 32 +- storage_controller/src/http.rs | 92 ++--- storage_controller/src/id_lock_map.rs | 7 +- storage_controller/src/leadership.rs | 12 +- storage_controller/src/main.rs | 22 +- storage_controller/src/metrics.rs | 13 +- storage_controller/src/node.rs | 22 +- storage_controller/src/pageserver_client.rs | 22 +- storage_controller/src/peer_client.rs | 9 +- storage_controller/src/persistence.rs | 41 +-- .../src/persistence/split_state.rs | 8 +- storage_controller/src/reconciler.rs | 18 +- storage_controller/src/safekeeper.rs | 15 +- storage_controller/src/safekeeper_client.rs | 9 +- storage_controller/src/scheduler.rs | 44 +-- storage_controller/src/service.rs | 319 +++++++++------- .../src/service/chaos_injector.rs | 19 +- .../src/service/context_iterator.rs | 11 +- storage_controller/src/tenant_shard.rs | 124 +++---- storage_scrubber/Cargo.toml | 2 +- storage_scrubber/src/checks.rs | 36 +- storage_scrubber/src/cloud_admin_api.rs | 4 +- storage_scrubber/src/find_large_objects.rs | 7 +- storage_scrubber/src/garbage.rs | 42 ++- storage_scrubber/src/lib.rs | 12 +- storage_scrubber/src/main.rs | 38 +- storage_scrubber/src/metadata_stream.rs | 10 +- .../src/pageserver_physical_gc.rs | 17 +- .../src/scan_pageserver_metadata.rs | 15 +- .../src/scan_safekeeper_metadata.rs | 19 +- storage_scrubber/src/tenant_snapshot.rs | 15 +- 221 files changed, 3543 insertions(+), 3611 deletions(-) diff --git a/libs/pageserver_api/Cargo.toml b/libs/pageserver_api/Cargo.toml index 79da05da6c..87dfdfb5ec 100644 --- a/libs/pageserver_api/Cargo.toml +++ b/libs/pageserver_api/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pageserver_api" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [features] diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 5a695c04ed..039cc1319e 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -9,19 +9,18 @@ pub const DEFAULT_PG_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_PG_LISTEN pub const DEFAULT_HTTP_LISTEN_PORT: u16 = 9898; pub const DEFAULT_HTTP_LISTEN_ADDR: &str = formatcp!("127.0.0.1:{DEFAULT_HTTP_LISTEN_PORT}"); +use std::collections::HashMap; +use std::num::{NonZeroU64, NonZeroUsize}; +use std::str::FromStr; +use std::time::Duration; + use postgres_backend::AuthType; use remote_storage::RemoteStorageConfig; use serde_with::serde_as; -use std::{ - collections::HashMap, - num::{NonZeroU64, NonZeroUsize}, - str::FromStr, - time::Duration, -}; -use utils::{logging::LogFormat, postgres_client::PostgresClientProtocol}; +use utils::logging::LogFormat; +use utils::postgres_client::PostgresClientProtocol; -use crate::models::ImageCompressionAlgorithm; -use crate::models::LsnLease; +use crate::models::{ImageCompressionAlgorithm, LsnLease}; // Certain metadata (e.g. externally-addressable name, AZ) is delivered // as a separate structure. This information is not neeed by the pageserver @@ -367,10 +366,10 @@ pub struct TenantConfigToml { } pub mod defaults { - use crate::models::ImageCompressionAlgorithm; - pub use storage_broker::DEFAULT_ENDPOINT as BROKER_DEFAULT_ENDPOINT; + use crate::models::ImageCompressionAlgorithm; + pub const DEFAULT_WAIT_LSN_TIMEOUT: &str = "300 s"; pub const DEFAULT_WAL_REDO_TIMEOUT: &str = "60 s"; diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index f94bfab581..2cfe1a85f9 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -9,11 +9,8 @@ use std::time::{Duration, Instant}; use serde::{Deserialize, Serialize}; use utils::id::{NodeId, TenantId}; -use crate::models::PageserverUtilization; -use crate::{ - models::{ShardParameters, TenantConfig}, - shard::{ShardStripeSize, TenantShardId}, -}; +use crate::models::{PageserverUtilization, ShardParameters, TenantConfig}; +use crate::shard::{ShardStripeSize, TenantShardId}; #[derive(Serialize, Deserialize, Debug)] #[serde(deny_unknown_fields)] @@ -354,7 +351,7 @@ impl FromStr for SkSchedulingPolicy { _ => { return Err(anyhow::anyhow!( "Unknown scheduling policy '{s}', try active,pause,decomissioned" - )) + )); } }) } @@ -457,9 +454,10 @@ pub struct SafekeeperSchedulingPolicyRequest { #[cfg(test)] mod test { - use super::*; use serde_json; + use super::*; + /// Check stability of PlacementPolicy's serialization #[test] fn placement_policy_encoding() -> anyhow::Result<()> { diff --git a/libs/pageserver_api/src/key.rs b/libs/pageserver_api/src/key.rs index b88a2e46a1..8836e7ec87 100644 --- a/libs/pageserver_api/src/key.rs +++ b/libs/pageserver_api/src/key.rs @@ -1,11 +1,12 @@ -use anyhow::{bail, Result}; -use byteorder::{ByteOrder, BE}; +use std::fmt; +use std::ops::Range; + +use anyhow::{Result, bail}; +use byteorder::{BE, ByteOrder}; use bytes::Bytes; use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM}; -use postgres_ffi::Oid; -use postgres_ffi::RepOriginId; +use postgres_ffi::{Oid, RepOriginId}; use serde::{Deserialize, Serialize}; -use std::{fmt, ops::Range}; use utils::const_assert; use crate::reltag::{BlockNumber, RelTag, SlruKind}; @@ -954,25 +955,22 @@ impl std::str::FromStr for Key { mod tests { use std::str::FromStr; - use crate::key::is_metadata_key_slice; - use crate::key::Key; - - use rand::Rng; - use rand::SeedableRng; + use rand::{Rng, SeedableRng}; use super::AUX_KEY_PREFIX; + use crate::key::{Key, is_metadata_key_slice}; #[test] fn display_fromstr_bijection() { let mut rng = rand::rngs::StdRng::seed_from_u64(42); let key = Key { - field1: rng.gen(), - field2: rng.gen(), - field3: rng.gen(), - field4: rng.gen(), - field5: rng.gen(), - field6: rng.gen(), + field1: rng.r#gen(), + field2: rng.r#gen(), + field3: rng.r#gen(), + field4: rng.r#gen(), + field5: rng.r#gen(), + field6: rng.r#gen(), }; assert_eq!(key, Key::from_str(&format!("{key}")).unwrap()); diff --git a/libs/pageserver_api/src/keyspace.rs b/libs/pageserver_api/src/keyspace.rs index c55b9e9484..e505f23e49 100644 --- a/libs/pageserver_api/src/keyspace.rs +++ b/libs/pageserver_api/src/keyspace.rs @@ -1,11 +1,10 @@ -use postgres_ffi::BLCKSZ; use std::ops::Range; -use crate::{ - key::Key, - shard::{ShardCount, ShardIdentity}, -}; use itertools::Itertools; +use postgres_ffi::BLCKSZ; + +use crate::key::Key; +use crate::shard::{ShardCount, ShardIdentity}; /// /// Represents a set of Keys, in a compact form. @@ -609,15 +608,13 @@ pub fn singleton_range(key: Key) -> Range { #[cfg(test)] mod tests { + use std::fmt::Write; + use rand::{RngCore, SeedableRng}; - use crate::{ - models::ShardParameters, - shard::{ShardCount, ShardNumber}, - }; - use super::*; - use std::fmt::Write; + use crate::models::ShardParameters; + use crate::shard::{ShardCount, ShardNumber}; // Helper function to create a key range. // diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 1164048229..ea565e7769 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -2,38 +2,30 @@ pub mod detach_ancestor; pub mod partitioning; pub mod utilization; -#[cfg(feature = "testing")] -use camino::Utf8PathBuf; -pub use utilization::PageserverUtilization; - use core::ops::Range; -use std::{ - collections::HashMap, - fmt::Display, - io::{BufRead, Read}, - num::{NonZeroU32, NonZeroU64, NonZeroUsize}, - str::FromStr, - time::{Duration, SystemTime}, -}; +use std::collections::HashMap; +use std::fmt::Display; +use std::io::{BufRead, Read}; +use std::num::{NonZeroU32, NonZeroU64, NonZeroUsize}; +use std::str::FromStr; +use std::time::{Duration, SystemTime}; use byteorder::{BigEndian, ReadBytesExt}; +use bytes::{Buf, BufMut, Bytes, BytesMut}; +#[cfg(feature = "testing")] +use camino::Utf8PathBuf; use postgres_ffi::BLCKSZ; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_with::serde_as; -use utils::{ - completion, - id::{NodeId, TenantId, TimelineId}, - lsn::Lsn, - postgres_client::PostgresClientProtocol, - serde_system_time, -}; +pub use utilization::PageserverUtilization; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; +use utils::postgres_client::PostgresClientProtocol; +use utils::{completion, serde_system_time}; -use crate::{ - key::{CompactKey, Key}, - reltag::RelTag, - shard::{ShardCount, ShardStripeSize, TenantShardId}, -}; -use bytes::{Buf, BufMut, Bytes, BytesMut}; +use crate::key::{CompactKey, Key}; +use crate::reltag::RelTag; +use crate::shard::{ShardCount, ShardStripeSize, TenantShardId}; /// The state of a tenant in this pageserver. /// @@ -332,7 +324,8 @@ pub struct ImportPgdataIdempotencyKey(pub String); impl ImportPgdataIdempotencyKey { pub fn random() -> Self { - use rand::{distributions::Alphanumeric, Rng}; + use rand::Rng; + use rand::distributions::Alphanumeric; Self( rand::thread_rng() .sample_iter(&Alphanumeric) @@ -2288,9 +2281,10 @@ impl Default for PageTraceEvent { #[cfg(test)] mod tests { - use serde_json::json; use std::str::FromStr; + use serde_json::json; + use super::*; #[test] diff --git a/libs/pageserver_api/src/models/utilization.rs b/libs/pageserver_api/src/models/utilization.rs index 641aa51989..69c240ff3c 100644 --- a/libs/pageserver_api/src/models/utilization.rs +++ b/libs/pageserver_api/src/models/utilization.rs @@ -1,5 +1,7 @@ use std::time::SystemTime; -use utils::{serde_percent::Percent, serde_system_time}; + +use utils::serde_percent::Percent; +use utils::serde_system_time; /// Pageserver current utilization and scoring for how good candidate the pageserver would be for /// the next tenant. @@ -131,12 +133,12 @@ impl PageserverUtilization { /// Test helper pub mod test_utilization { - use super::PageserverUtilization; use std::time::SystemTime; - use utils::{ - serde_percent::Percent, - serde_system_time::{self}, - }; + + use utils::serde_percent::Percent; + use utils::serde_system_time::{self}; + + use super::PageserverUtilization; // Parameters of the imaginary node used for test utilization instances const TEST_DISK_SIZE: u64 = 1024 * 1024 * 1024 * 1024; diff --git a/libs/pageserver_api/src/record.rs b/libs/pageserver_api/src/record.rs index bb62b35d36..fda504a26e 100644 --- a/libs/pageserver_api/src/record.rs +++ b/libs/pageserver_api/src/record.rs @@ -1,7 +1,7 @@ //! This module defines the WAL record format used within the pageserver. use bytes::Bytes; -use postgres_ffi::walrecord::{describe_postgres_wal_record, MultiXactMember}; +use postgres_ffi::walrecord::{MultiXactMember, describe_postgres_wal_record}; use postgres_ffi::{MultiXactId, MultiXactOffset, TimestampTz, TransactionId}; use serde::{Deserialize, Serialize}; use utils::bin_ser::DeserializeError; diff --git a/libs/pageserver_api/src/reltag.rs b/libs/pageserver_api/src/reltag.rs index 09d1fae221..473a44dbf9 100644 --- a/libs/pageserver_api/src/reltag.rs +++ b/libs/pageserver_api/src/reltag.rs @@ -1,10 +1,10 @@ -use serde::{Deserialize, Serialize}; use std::cmp::Ordering; use std::fmt; -use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID; -use postgres_ffi::relfile_utils::{forkname_to_number, forknumber_to_name, MAIN_FORKNUM}; use postgres_ffi::Oid; +use postgres_ffi::pg_constants::GLOBALTABLESPACE_OID; +use postgres_ffi::relfile_utils::{MAIN_FORKNUM, forkname_to_number, forknumber_to_name}; +use serde::{Deserialize, Serialize}; /// /// Relation data file segment id throughout the Postgres cluster. diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index e03df02afb..eca04b1f3d 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -33,12 +33,13 @@ use std::hash::{Hash, Hasher}; -use crate::{key::Key, models::ShardParameters}; +#[doc(inline)] +pub use ::utils::shard::*; use postgres_ffi::relfile_utils::INIT_FORKNUM; use serde::{Deserialize, Serialize}; -#[doc(inline)] -pub use ::utils::shard::*; +use crate::key::Key; +use crate::models::ShardParameters; /// The ShardIdentity contains enough information to map a [`Key`] to a [`ShardNumber`], /// and to check whether that [`ShardNumber`] is the same as the current shard. @@ -337,7 +338,8 @@ pub fn describe( mod tests { use std::str::FromStr; - use utils::{id::TenantId, Hex}; + use utils::Hex; + use utils::id::TenantId; use super::*; diff --git a/libs/pageserver_api/src/upcall_api.rs b/libs/pageserver_api/src/upcall_api.rs index 2e88836bd0..647d01c3c2 100644 --- a/libs/pageserver_api/src/upcall_api.rs +++ b/libs/pageserver_api/src/upcall_api.rs @@ -6,9 +6,9 @@ use serde::{Deserialize, Serialize}; use utils::id::NodeId; -use crate::{ - controller_api::NodeRegisterRequest, models::LocationConfigMode, shard::TenantShardId, -}; +use crate::controller_api::NodeRegisterRequest; +use crate::models::LocationConfigMode; +use crate::shard::TenantShardId; /// Upcall message sent by the pageserver to the configured `control_plane_api` on /// startup. @@ -30,7 +30,7 @@ fn default_mode() -> LocationConfigMode { pub struct ReAttachResponseTenant { pub id: TenantShardId, /// Mandatory if LocationConfigMode is None or set to an Attached* mode - pub gen: Option, + pub r#gen: Option, /// Default value only for backward compat: this field should be set #[serde(default = "default_mode")] @@ -44,7 +44,7 @@ pub struct ReAttachResponse { #[derive(Serialize, Deserialize)] pub struct ValidateRequestTenant { pub id: TenantShardId, - pub gen: u32, + pub r#gen: u32, } #[derive(Serialize, Deserialize)] diff --git a/libs/pageserver_api/src/value.rs b/libs/pageserver_api/src/value.rs index 1f8ed30a9a..883d903ff3 100644 --- a/libs/pageserver_api/src/value.rs +++ b/libs/pageserver_api/src/value.rs @@ -7,10 +7,11 @@ //! Note that the [`Value`] type is used for the permananent storage format, so any //! changes to it must be backwards compatible. -use crate::record::NeonWalRecord; use bytes::Bytes; use serde::{Deserialize, Serialize}; +use crate::record::NeonWalRecord; + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum Value { /// An Image value contains a full copy of the value @@ -83,11 +84,11 @@ impl ValueBytes { #[cfg(test)] mod test { - use super::*; - use bytes::Bytes; use utils::bin_ser::BeSer; + use super::*; + macro_rules! roundtrip { ($orig:expr, $expected:expr) => {{ let orig: Value = $orig; diff --git a/libs/remote_storage/Cargo.toml b/libs/remote_storage/Cargo.toml index 33fa6e89f5..7bdf340f74 100644 --- a/libs/remote_storage/Cargo.toml +++ b/libs/remote_storage/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "remote_storage" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [dependencies] diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 9027a8bf55..dee61a410d 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -2,33 +2,26 @@ use std::borrow::Cow; use std::collections::HashMap; -use std::env; use std::fmt::Display; -use std::io; use std::num::NonZeroU32; use std::pin::Pin; use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; -use std::time::SystemTime; +use std::time::{Duration, SystemTime}; +use std::{env, io}; -use super::REMOTE_STORAGE_PREFIX_SEPARATOR; -use anyhow::Context; -use anyhow::Result; +use anyhow::{Context, Result}; use azure_core::request_options::{IfMatchCondition, MaxResults, Metadata, Range}; -use azure_core::HttpClient; -use azure_core::TransportOptions; -use azure_core::{Continuable, RetryOptions}; +use azure_core::{Continuable, HttpClient, RetryOptions, TransportOptions}; use azure_storage::StorageCredentials; use azure_storage_blobs::blob::CopyStatus; -use azure_storage_blobs::prelude::ClientBuilder; -use azure_storage_blobs::{blob::operations::GetBlobBuilder, prelude::ContainerClient}; +use azure_storage_blobs::blob::operations::GetBlobBuilder; +use azure_storage_blobs::prelude::{ClientBuilder, ContainerClient}; use bytes::Bytes; +use futures::FutureExt; use futures::future::Either; use futures::stream::Stream; -use futures::FutureExt; -use futures_util::StreamExt; -use futures_util::TryStreamExt; +use futures_util::{StreamExt, TryStreamExt}; use http_types::{StatusCode, Url}; use scopeguard::ScopeGuard; use tokio_util::sync::CancellationToken; @@ -36,12 +29,13 @@ use tracing::debug; use utils::backoff; use utils::backoff::exponential_backoff_duration_seconds; -use crate::metrics::{start_measuring_requests, AttemptOutcome, RequestKind}; -use crate::DownloadKind; +use super::REMOTE_STORAGE_PREFIX_SEPARATOR; +use crate::config::AzureConfig; +use crate::error::Cancelled; +use crate::metrics::{AttemptOutcome, RequestKind, start_measuring_requests}; use crate::{ - config::AzureConfig, error::Cancelled, ConcurrencyLimiter, Download, DownloadError, - DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, RemoteStorage, StorageMetadata, - TimeTravelError, TimeoutOrCancel, + ConcurrencyLimiter, Download, DownloadError, DownloadKind, DownloadOpts, Listing, ListingMode, + ListingObject, RemotePath, RemoteStorage, StorageMetadata, TimeTravelError, TimeoutOrCancel, }; pub struct AzureBlobStorage { diff --git a/libs/remote_storage/src/config.rs b/libs/remote_storage/src/config.rs index ff34158c9c..52978be5b4 100644 --- a/libs/remote_storage/src/config.rs +++ b/libs/remote_storage/src/config.rs @@ -1,8 +1,10 @@ -use std::{fmt::Debug, num::NonZeroUsize, str::FromStr, time::Duration}; +use std::fmt::Debug; +use std::num::NonZeroUsize; +use std::str::FromStr; +use std::time::Duration; use aws_sdk_s3::types::StorageClass; use camino::Utf8PathBuf; - use serde::{Deserialize, Serialize}; use crate::{ diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 69b522d63e..6eb5570d9b 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -18,40 +18,35 @@ mod s3_bucket; mod simulate_failures; mod support; -use std::{ - collections::HashMap, - fmt::Debug, - num::NonZeroU32, - ops::Bound, - pin::{pin, Pin}, - sync::Arc, - time::SystemTime, -}; +use std::collections::HashMap; +use std::fmt::Debug; +use std::num::NonZeroU32; +use std::ops::Bound; +use std::pin::{Pin, pin}; +use std::sync::Arc; +use std::time::SystemTime; use anyhow::Context; -use camino::{Utf8Path, Utf8PathBuf}; - +/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here. +pub use azure_core::Etag; use bytes::Bytes; -use futures::{stream::Stream, StreamExt}; +use camino::{Utf8Path, Utf8PathBuf}; +pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel}; +use futures::StreamExt; +use futures::stream::Stream; use itertools::Itertools as _; +use s3_bucket::RequestKind; use serde::{Deserialize, Serialize}; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; use tracing::info; -pub use self::{ - azure_blob::AzureBlobStorage, local_fs::LocalFs, s3_bucket::S3Bucket, - simulate_failures::UnreliableWrapper, -}; -use s3_bucket::RequestKind; - +pub use self::azure_blob::AzureBlobStorage; +pub use self::local_fs::LocalFs; +pub use self::s3_bucket::S3Bucket; +pub use self::simulate_failures::UnreliableWrapper; pub use crate::config::{AzureConfig, RemoteStorageConfig, RemoteStorageKind, S3Config}; -/// Azure SDK's ETag type is a simple String wrapper: we use this internally instead of repeating it here. -pub use azure_core::Etag; - -pub use error::{DownloadError, TimeTravelError, TimeoutOrCancel}; - /// Default concurrency limit for S3 operations /// /// Currently, sync happens with AWS S3, that has two limits on requests per second: @@ -640,8 +635,13 @@ impl GenericRemoteStorage { let profile = std::env::var("AWS_PROFILE").unwrap_or_else(|_| "".into()); let access_key_id = std::env::var("AWS_ACCESS_KEY_ID").unwrap_or_else(|_| "".into()); - info!("Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}", - s3_config.bucket_name, s3_config.bucket_region, s3_config.prefix_in_bucket, s3_config.endpoint); + info!( + "Using s3 bucket '{}' in region '{}' as a remote storage, prefix in bucket: '{:?}', bucket endpoint: '{:?}', profile: {profile}, access_key_id: {access_key_id}", + s3_config.bucket_name, + s3_config.bucket_region, + s3_config.prefix_in_bucket, + s3_config.endpoint + ); Self::AwsS3(Arc::new(S3Bucket::new(s3_config, timeout).await?)) } RemoteStorageKind::AzureContainer(azure_config) => { @@ -649,8 +649,12 @@ impl GenericRemoteStorage { .storage_account .as_deref() .unwrap_or(""); - info!("Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'", - azure_config.container_name, azure_config.container_region, azure_config.prefix_in_container); + info!( + "Using azure container '{}' in account '{storage_account}' in region '{}' as a remote storage, prefix in container: '{:?}'", + azure_config.container_name, + azure_config.container_region, + azure_config.prefix_in_container + ); Self::AzureBlob(Arc::new(AzureBlobStorage::new( azure_config, timeout, diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index a8b00173ba..f03d6ac8ee 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -4,31 +4,26 @@ //! This storage used in tests, but can also be used in cases when a certain persistent //! volume is mounted to the local FS. -use std::{ - collections::HashSet, - io::ErrorKind, - num::NonZeroU32, - time::{Duration, SystemTime, UNIX_EPOCH}, -}; +use std::collections::HashSet; +use std::io::ErrorKind; +use std::num::NonZeroU32; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use anyhow::{bail, ensure, Context}; +use anyhow::{Context, bail, ensure}; use bytes::Bytes; use camino::{Utf8Path, Utf8PathBuf}; use futures::stream::Stream; -use tokio::{ - fs, - io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt}, -}; -use tokio_util::{io::ReaderStream, sync::CancellationToken}; +use tokio::fs; +use tokio::io::{self, AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; +use tokio_util::io::ReaderStream; +use tokio_util::sync::CancellationToken; use utils::crashsafe::path_with_suffix_extension; -use crate::{ - Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, RemotePath, - TimeTravelError, TimeoutOrCancel, REMOTE_STORAGE_PREFIX_SEPARATOR, -}; - use super::{RemoteStorage, StorageMetadata}; -use crate::Etag; +use crate::{ + Download, DownloadError, DownloadOpts, Etag, Listing, ListingMode, ListingObject, + REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, TimeTravelError, TimeoutOrCancel, +}; const LOCAL_FS_TEMP_FILE_SUFFIX: &str = "___temp"; @@ -91,7 +86,8 @@ impl LocalFs { #[cfg(test)] async fn list_all(&self) -> anyhow::Result> { - use std::{future::Future, pin::Pin}; + use std::future::Future; + use std::pin::Pin; fn get_all_files<'a, P>( directory_path: P, ) -> Pin>> + Send + Sync + 'a>> @@ -284,7 +280,9 @@ impl LocalFs { })?; if bytes_read < from_size_bytes { - bail!("Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes"); + bail!( + "Provided stream was shorter than expected: {bytes_read} vs {from_size_bytes} bytes" + ); } // Check if there is any extra data after the given size. let mut from = buffer_to_read.into_inner(); @@ -642,10 +640,13 @@ fn mock_etag(meta: &std::fs::Metadata) -> Etag { #[cfg(test)] mod fs_tests { - use super::*; + use std::collections::HashMap; + use std::io::Write; + use std::ops::Bound; use camino_tempfile::tempdir; - use std::{collections::HashMap, io::Write, ops::Bound}; + + use super::*; async fn read_and_check_metadata( storage: &LocalFs, @@ -736,9 +737,14 @@ mod fs_tests { ); let non_existing_path = RemotePath::new(Utf8Path::new("somewhere/else"))?; - match storage.download(&non_existing_path, &DownloadOpts::default(), &cancel).await { + match storage + .download(&non_existing_path, &DownloadOpts::default(), &cancel) + .await + { Err(DownloadError::NotFound) => {} // Should get NotFound for non existing keys - other => panic!("Should get a NotFound error when downloading non-existing storage files, but got: {other:?}"), + other => panic!( + "Should get a NotFound error when downloading non-existing storage files, but got: {other:?}" + ), } Ok(()) } diff --git a/libs/remote_storage/src/metrics.rs b/libs/remote_storage/src/metrics.rs index 48c121fbc8..81e68e9a29 100644 --- a/libs/remote_storage/src/metrics.rs +++ b/libs/remote_storage/src/metrics.rs @@ -1,5 +1,5 @@ use metrics::{ - register_histogram_vec, register_int_counter, register_int_counter_vec, Histogram, IntCounter, + Histogram, IntCounter, register_histogram_vec, register_int_counter, register_int_counter_vec, }; use once_cell::sync::Lazy; @@ -16,8 +16,8 @@ pub(crate) enum RequestKind { Head = 6, } -use scopeguard::ScopeGuard; use RequestKind::*; +use scopeguard::ScopeGuard; impl RequestKind { const fn as_str(&self) -> &'static str { diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index d3f19f0b11..ba7ce9e1e7 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -4,56 +4,50 @@ //! allowing multiple api users to independently work with the same S3 bucket, if //! their bucket prefixes are both specified and different. -use std::{ - borrow::Cow, - collections::HashMap, - num::NonZeroU32, - pin::Pin, - sync::Arc, - task::{Context, Poll}, - time::{Duration, SystemTime}, -}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::num::NonZeroU32; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{Context, Poll}; +use std::time::{Duration, SystemTime}; -use anyhow::{anyhow, Context as _}; -use aws_config::{ - default_provider::credentials::DefaultCredentialsChain, - retry::{RetryConfigBuilder, RetryMode}, - BehaviorVersion, -}; -use aws_sdk_s3::{ - config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep}, - error::SdkError, - operation::{get_object::GetObjectError, head_object::HeadObjectError}, - types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass}, - Client, -}; +use anyhow::{Context as _, anyhow}; +use aws_config::BehaviorVersion; +use aws_config::default_provider::credentials::DefaultCredentialsChain; +use aws_config::retry::{RetryConfigBuilder, RetryMode}; +use aws_sdk_s3::Client; +use aws_sdk_s3::config::{AsyncSleep, IdentityCache, Region, SharedAsyncSleep}; +use aws_sdk_s3::error::SdkError; +use aws_sdk_s3::operation::get_object::GetObjectError; +use aws_sdk_s3::operation::head_object::HeadObjectError; +use aws_sdk_s3::types::{Delete, DeleteMarkerEntry, ObjectIdentifier, ObjectVersion, StorageClass}; use aws_smithy_async::rt::sleep::TokioSleep; -use http_body_util::StreamBody; -use http_types::StatusCode; - -use aws_smithy_types::{body::SdkBody, DateTime}; -use aws_smithy_types::{byte_stream::ByteStream, date_time::ConversionError}; +use aws_smithy_types::DateTime; +use aws_smithy_types::body::SdkBody; +use aws_smithy_types::byte_stream::ByteStream; +use aws_smithy_types::date_time::ConversionError; use bytes::Bytes; use futures::stream::Stream; use futures_util::StreamExt; +use http_body_util::StreamBody; +use http_types::StatusCode; use hyper::body::Frame; use scopeguard::ScopeGuard; use tokio_util::sync::CancellationToken; use utils::backoff; use super::StorageMetadata; -use crate::{ - config::S3Config, - error::Cancelled, - metrics::{start_counting_cancelled_wait, start_measuring_requests}, - support::PermitCarrying, - ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, - RemotePath, RemoteStorage, TimeTravelError, TimeoutOrCancel, MAX_KEYS_PER_DELETE_S3, - REMOTE_STORAGE_PREFIX_SEPARATOR, -}; - -use crate::metrics::AttemptOutcome; +use crate::config::S3Config; +use crate::error::Cancelled; pub(super) use crate::metrics::RequestKind; +use crate::metrics::{AttemptOutcome, start_counting_cancelled_wait, start_measuring_requests}; +use crate::support::PermitCarrying; +use crate::{ + ConcurrencyLimiter, Download, DownloadError, DownloadOpts, Listing, ListingMode, ListingObject, + MAX_KEYS_PER_DELETE_S3, REMOTE_STORAGE_PREFIX_SEPARATOR, RemotePath, RemoteStorage, + TimeTravelError, TimeoutOrCancel, +}; /// AWS S3 storage. pub struct S3Bucket { @@ -958,8 +952,10 @@ impl RemoteStorage for S3Bucket { version_id, key, .. } = &vd; if version_id == "null" { - return Err(TimeTravelError::Other(anyhow!("Received ListVersions response for key={key} with version_id='null', \ - indicating either disabled versioning, or legacy objects with null version id values"))); + return Err(TimeTravelError::Other(anyhow!( + "Received ListVersions response for key={key} with version_id='null', \ + indicating either disabled versioning, or legacy objects with null version id values" + ))); } tracing::trace!( "Parsing version key={key} version_id={version_id} kind={:?}", @@ -1126,9 +1122,10 @@ impl VerOrDelete { #[cfg(test)] mod tests { - use camino::Utf8Path; use std::num::NonZeroUsize; + use camino::Utf8Path; + use crate::{RemotePath, S3Bucket, S3Config}; #[tokio::test] diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs index 63c24beb51..f56be873c4 100644 --- a/libs/remote_storage/src/simulate_failures.rs +++ b/libs/remote_storage/src/simulate_failures.rs @@ -1,14 +1,15 @@ //! This module provides a wrapper around a real RemoteStorage implementation that //! causes the first N attempts at each upload or download operatio to fail. For //! testing purposes. -use bytes::Bytes; -use futures::stream::Stream; -use futures::StreamExt; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::num::NonZeroU32; -use std::sync::Mutex; +use std::sync::{Arc, Mutex}; use std::time::SystemTime; -use std::{collections::hash_map::Entry, sync::Arc}; + +use bytes::Bytes; +use futures::StreamExt; +use futures::stream::Stream; use tokio_util::sync::CancellationToken; use crate::{ diff --git a/libs/remote_storage/src/support.rs b/libs/remote_storage/src/support.rs index 1ed9ed9305..07da38cf77 100644 --- a/libs/remote_storage/src/support.rs +++ b/libs/remote_storage/src/support.rs @@ -1,9 +1,7 @@ -use std::{ - future::Future, - pin::Pin, - task::{Context, Poll}, - time::Duration, -}; +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; +use std::time::Duration; use bytes::Bytes; use futures_util::Stream; @@ -114,9 +112,10 @@ pub(crate) fn cancel_or_timeout( #[cfg(test)] mod tests { + use futures::stream::StreamExt; + use super::*; use crate::DownloadError; - use futures::stream::StreamExt; #[tokio::test(start_paused = true)] async fn cancelled_download_stream() { diff --git a/libs/remote_storage/tests/common/tests.rs b/libs/remote_storage/tests/common/tests.rs index d5da1d48e9..6a78ddc01e 100644 --- a/libs/remote_storage/tests/common/tests.rs +++ b/libs/remote_storage/tests/common/tests.rs @@ -1,19 +1,20 @@ +use std::collections::HashSet; +use std::num::NonZeroU32; +use std::ops::Bound; +use std::sync::Arc; + use anyhow::Context; use camino::Utf8Path; use futures::StreamExt; use remote_storage::{DownloadError, DownloadOpts, ListingMode, ListingObject, RemotePath}; -use std::ops::Bound; -use std::sync::Arc; -use std::{collections::HashSet, num::NonZeroU32}; use test_context::test_context; use tokio_util::sync::CancellationToken; use tracing::debug; -use crate::common::{download_to_vec, upload_stream, wrap_stream}; - use super::{ MaybeEnabledStorage, MaybeEnabledStorageWithSimpleTestBlobs, MaybeEnabledStorageWithTestBlobs, }; +use crate::common::{download_to_vec, upload_stream, wrap_stream}; /// Tests that S3 client can list all prefixes, even if the response come paginated and requires multiple S3 queries. /// Uses real S3 and requires [`ENABLE_REAL_S3_REMOTE_STORAGE_ENV_VAR_NAME`] and related S3 cred env vars specified. @@ -62,7 +63,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a .into_iter() .collect::>(); assert_eq!( - root_remote_prefixes, HashSet::from([base_prefix.clone()]), + root_remote_prefixes, + HashSet::from([base_prefix.clone()]), "remote storage root prefixes list mismatches with the uploads. Returned prefixes: {root_remote_prefixes:?}" ); @@ -84,7 +86,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a .difference(&nested_remote_prefixes) .collect::>(); assert_eq!( - remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0, + remote_only_prefixes.len() + missing_uploaded_prefixes.len(), + 0, "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}", ); @@ -119,7 +122,8 @@ async fn pagination_should_work(ctx: &mut MaybeEnabledStorageWithTestBlobs) -> a .difference(&nested_remote_prefixes_combined) .collect::>(); assert_eq!( - remote_only_prefixes.len() + missing_uploaded_prefixes.len(), 0, + remote_only_prefixes.len() + missing_uploaded_prefixes.len(), + 0, "remote storage nested prefixes list mismatches with the uploads. Remote only prefixes: {remote_only_prefixes:?}, missing uploaded prefixes: {missing_uploaded_prefixes:?}", ); diff --git a/libs/remote_storage/tests/test_real_azure.rs b/libs/remote_storage/tests/test_real_azure.rs index 15004dbf83..31c9ca3200 100644 --- a/libs/remote_storage/tests/test_real_azure.rs +++ b/libs/remote_storage/tests/test_real_azure.rs @@ -1,9 +1,9 @@ +use std::collections::HashSet; use std::env; use std::num::NonZeroUsize; use std::ops::ControlFlow; use std::sync::Arc; -use std::time::UNIX_EPOCH; -use std::{collections::HashSet, time::Duration}; +use std::time::{Duration, UNIX_EPOCH}; use anyhow::Context; use remote_storage::{ @@ -208,7 +208,7 @@ async fn create_azure_client( .as_millis(); // because nanos can be the same for two threads so can millis, add randomness - let random = rand::thread_rng().gen::(); + let random = rand::thread_rng().r#gen::(); let remote_storage_config = RemoteStorageConfig { storage: RemoteStorageKind::AzureContainer(AzureConfig { diff --git a/libs/remote_storage/tests/test_real_s3.rs b/libs/remote_storage/tests/test_real_s3.rs index e60ec18c93..6996bb27ae 100644 --- a/libs/remote_storage/tests/test_real_s3.rs +++ b/libs/remote_storage/tests/test_real_s3.rs @@ -1,13 +1,12 @@ +use std::collections::HashSet; use std::env; use std::fmt::{Debug, Display}; use std::future::Future; use std::num::NonZeroUsize; use std::ops::ControlFlow; use std::sync::Arc; -use std::time::{Duration, UNIX_EPOCH}; -use std::{collections::HashSet, time::SystemTime}; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use crate::common::{download_to_vec, upload_stream}; use anyhow::Context; use camino::Utf8Path; use futures_util::StreamExt; @@ -15,12 +14,13 @@ use remote_storage::{ DownloadError, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config, }; -use test_context::test_context; -use test_context::AsyncTestContext; +use test_context::{AsyncTestContext, test_context}; use tokio::io::AsyncBufReadExt; use tokio_util::sync::CancellationToken; use tracing::info; +use crate::common::{download_to_vec, upload_stream}; + mod common; #[path = "common/tests.rs"] @@ -128,8 +128,10 @@ async fn s3_time_travel_recovery_works(ctx: &mut MaybeEnabledStorage) -> anyhow: let t0_hwt = t0 + half_wt; let t1_hwt = t1 - half_wt; if !(t0_hwt..=t1_hwt).contains(&last_modified) { - panic!("last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \ - This likely means a large lock discrepancy between S3 and the local clock."); + panic!( + "last_modified={last_modified:?} is not between t0_hwt={t0_hwt:?} and t1_hwt={t1_hwt:?}. \ + This likely means a large lock discrepancy between S3 and the local clock." + ); } } @@ -383,7 +385,7 @@ async fn create_s3_client( .as_millis(); // because nanos can be the same for two threads so can millis, add randomness - let random = rand::thread_rng().gen::(); + let random = rand::thread_rng().r#gen::(); let remote_storage_config = RemoteStorageConfig { storage: RemoteStorageKind::AwsS3(S3Config { diff --git a/libs/safekeeper_api/Cargo.toml b/libs/safekeeper_api/Cargo.toml index 6b72ace019..d9d080e8fe 100644 --- a/libs/safekeeper_api/Cargo.toml +++ b/libs/safekeeper_api/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "safekeeper_api" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [dependencies] diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs index 2f20ec5f94..4ccdd491b0 100644 --- a/libs/safekeeper_api/src/membership.rs +++ b/libs/safekeeper_api/src/membership.rs @@ -2,7 +2,8 @@ //! rfcs/035-safekeeper-dynamic-membership-change.md //! for details. -use std::{collections::HashSet, fmt::Display}; +use std::collections::HashSet; +use std::fmt::Display; use anyhow; use anyhow::bail; @@ -148,9 +149,10 @@ impl Display for Configuration { #[cfg(test)] mod tests { - use super::{MemberSet, SafekeeperId}; use utils::id::NodeId; + use super::{MemberSet, SafekeeperId}; + #[test] fn test_member_set() { let mut members = MemberSet::empty(); diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 41ccdaa428..2f2aeaa429 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -1,18 +1,17 @@ //! Types used in safekeeper http API. Many of them are also reused internally. +use std::net::SocketAddr; + use pageserver_api::shard::ShardIdentity; use postgres_ffi::TimestampTz; use serde::{Deserialize, Serialize}; -use std::net::SocketAddr; use tokio::time::Instant; +use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; -use utils::{ - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, - pageserver_feedback::PageserverFeedback, -}; - -use crate::{membership::Configuration, ServerInfo, Term}; +use crate::membership::Configuration; +use crate::{ServerInfo, Term}; #[derive(Debug, Serialize)] pub struct SafekeeperStatus { diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 9d4463d595..7330856be4 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pageserver" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [features] diff --git a/pageserver/benches/bench_ingest.rs b/pageserver/benches/bench_ingest.rs index b67a9cc479..b1103948d6 100644 --- a/pageserver/benches/bench_ingest.rs +++ b/pageserver/benches/bench_ingest.rs @@ -1,22 +1,20 @@ -use std::{env, num::NonZeroUsize}; +use std::env; +use std::num::NonZeroUsize; use bytes::Bytes; use camino::Utf8PathBuf; -use criterion::{criterion_group, criterion_main, Criterion}; -use pageserver::{ - config::PageServerConf, - context::{DownloadBehavior, RequestContext}, - l0_flush::{L0FlushConfig, L0FlushGlobalState}, - page_cache, - task_mgr::TaskKind, - tenant::storage_layer::InMemoryLayer, - virtual_file, -}; -use pageserver_api::{key::Key, shard::TenantShardId, value::Value}; -use utils::{ - bin_ser::BeSer, - id::{TenantId, TimelineId}, -}; +use criterion::{Criterion, criterion_group, criterion_main}; +use pageserver::config::PageServerConf; +use pageserver::context::{DownloadBehavior, RequestContext}; +use pageserver::l0_flush::{L0FlushConfig, L0FlushGlobalState}; +use pageserver::task_mgr::TaskKind; +use pageserver::tenant::storage_layer::InMemoryLayer; +use pageserver::{page_cache, virtual_file}; +use pageserver_api::key::Key; +use pageserver_api::shard::TenantShardId; +use pageserver_api::value::Value; +use utils::bin_ser::BeSer; +use utils::id::{TenantId, TimelineId}; use wal_decoder::serialized_batch::SerializedValueBatch; // A very cheap hash for generating non-sequential keys. diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 5c5b52db44..e11af49449 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -1,23 +1,21 @@ -use criterion::measurement::WallTime; -use pageserver::keyspace::{KeyPartitioning, KeySpace}; -use pageserver::tenant::layer_map::LayerMap; -use pageserver::tenant::storage_layer::LayerName; -use pageserver::tenant::storage_layer::PersistentLayerDesc; -use pageserver_api::key::Key; -use pageserver_api::shard::TenantShardId; -use rand::prelude::{SeedableRng, SliceRandom, StdRng}; use std::cmp::{max, min}; use std::fs::File; use std::io::{BufRead, BufReader}; use std::path::PathBuf; use std::str::FromStr; use std::time::Instant; + +use criterion::measurement::WallTime; +use criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main}; +use pageserver::keyspace::{KeyPartitioning, KeySpace}; +use pageserver::tenant::layer_map::LayerMap; +use pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc}; +use pageserver_api::key::Key; +use pageserver_api::shard::TenantShardId; +use rand::prelude::{SeedableRng, SliceRandom, StdRng}; use utils::id::{TenantId, TimelineId}; - use utils::lsn::Lsn; -use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion}; - fn fixture_path(relative: &str) -> PathBuf { PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative) } diff --git a/pageserver/benches/bench_walredo.rs b/pageserver/benches/bench_walredo.rs index d3551b56e1..77b3f90b3e 100644 --- a/pageserver/benches/bench_walredo.rs +++ b/pageserver/benches/bench_walredo.rs @@ -56,20 +56,23 @@ //! medium/128 time: [10.412 ms 10.574 ms 10.718 ms] //! ``` +use std::future::Future; +use std::sync::Arc; +use std::time::{Duration, Instant}; + use anyhow::Context; use bytes::{Buf, Bytes}; use criterion::{BenchmarkId, Criterion}; use once_cell::sync::Lazy; -use pageserver::{config::PageServerConf, walredo::PostgresRedoManager}; +use pageserver::config::PageServerConf; +use pageserver::walredo::PostgresRedoManager; +use pageserver_api::key::Key; use pageserver_api::record::NeonWalRecord; -use pageserver_api::{key::Key, shard::TenantShardId}; -use std::{ - future::Future, - sync::Arc, - time::{Duration, Instant}, -}; -use tokio::{sync::Barrier, task::JoinSet}; -use utils::{id::TenantId, lsn::Lsn}; +use pageserver_api::shard::TenantShardId; +use tokio::sync::Barrier; +use tokio::task::JoinSet; +use utils::id::TenantId; +use utils::lsn::Lsn; fn bench(c: &mut Criterion) { macro_rules! bench_group { diff --git a/pageserver/benches/upload_queue.rs b/pageserver/benches/upload_queue.rs index ed5daa8ae1..8de06a6c25 100644 --- a/pageserver/benches/upload_queue.rs +++ b/pageserver/benches/upload_queue.rs @@ -1,15 +1,15 @@ //! Upload queue benchmarks. use std::str::FromStr as _; -use std::sync::atomic::AtomicU32; use std::sync::Arc; +use std::sync::atomic::AtomicU32; -use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use criterion::{Bencher, Criterion, criterion_group, criterion_main}; +use pageserver::tenant::IndexPart; use pageserver::tenant::metadata::TimelineMetadata; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver::tenant::storage_layer::LayerName; use pageserver::tenant::upload_queue::{Delete, UploadOp, UploadQueue, UploadTask}; -use pageserver::tenant::IndexPart; use pprof::criterion::{Output, PProfProfiler}; use utils::generation::Generation; use utils::shard::{ShardCount, ShardIndex, ShardNumber}; diff --git a/pageserver/compaction/src/helpers.rs b/pageserver/compaction/src/helpers.rs index 6b739d85a7..7e4e3042b3 100644 --- a/pageserver/compaction/src/helpers.rs +++ b/pageserver/compaction/src/helpers.rs @@ -221,12 +221,12 @@ where // performed implicitly when `top` is dropped). if let Some(mut top) = this.heap.peek_mut() { match top.deref_mut() { - LazyLoadLayer::Unloaded(ref mut l) => { + LazyLoadLayer::Unloaded(l) => { let fut = l.load_keys(this.ctx); this.load_future.set(Some(Box::pin(fut))); continue; } - LazyLoadLayer::Loaded(ref mut entries) => { + LazyLoadLayer::Loaded(entries) => { let result = entries.pop_front().unwrap(); if entries.is_empty() { std::collections::binary_heap::PeekMut::pop(top); diff --git a/pageserver/pagebench/src/util/request_stats.rs b/pageserver/pagebench/src/util/request_stats.rs index 4aa6950782..ebe7bc031d 100644 --- a/pageserver/pagebench/src/util/request_stats.rs +++ b/pageserver/pagebench/src/util/request_stats.rs @@ -40,9 +40,7 @@ impl Stats { } } pub(crate) fn add(&mut self, other: &Self) { - let Self { - ref mut latency_histo, - } = self; + let Self { latency_histo } = self; latency_histo.add(&other.latency_histo).unwrap(); } } diff --git a/pageserver/src/assert_u64_eq_usize.rs b/pageserver/src/assert_u64_eq_usize.rs index 66ca7fd057..c4b8d9acba 100644 --- a/pageserver/src/assert_u64_eq_usize.rs +++ b/pageserver/src/assert_u64_eq_usize.rs @@ -2,7 +2,9 @@ pub(crate) const _ASSERT_U64_EQ_USIZE: () = { if std::mem::size_of::() != std::mem::size_of::() { - panic!("the traits defined in this module assume that usize and u64 can be converted to each other without loss of information"); + panic!( + "the traits defined in this module assume that usize and u64 can be converted to each other without loss of information" + ); } }; diff --git a/pageserver/src/aux_file.rs b/pageserver/src/aux_file.rs index 5cc20a70b2..b76c0e045f 100644 --- a/pageserver/src/aux_file.rs +++ b/pageserver/src/aux_file.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use ::metrics::IntGauge; use bytes::{Buf, BufMut, Bytes}; -use pageserver_api::key::{Key, AUX_KEY_PREFIX, METADATA_KEY_SIZE}; +use pageserver_api::key::{AUX_KEY_PREFIX, Key, METADATA_KEY_SIZE}; use tracing::warn; // BEGIN Copyright (c) 2017 Servo Contributors diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index 99b0775316..ce54bd9c1c 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -10,33 +10,31 @@ //! This module is responsible for creation of such tarball //! from data stored in object storage. //! -use anyhow::{anyhow, Context}; -use bytes::{BufMut, Bytes, BytesMut}; -use fail::fail_point; -use pageserver_api::key::{rel_block_to_key, Key}; -use postgres_ffi::pg_constants; use std::fmt::Write as FmtWrite; use std::time::{Instant, SystemTime}; + +use anyhow::{Context, anyhow}; +use bytes::{BufMut, Bytes, BytesMut}; +use fail::fail_point; +use pageserver_api::key::{Key, rel_block_to_key}; +use pageserver_api::reltag::{RelTag, SlruKind}; +use postgres_ffi::pg_constants::{ + DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID, PG_HBA, PGDATA_SPECIAL_FILES, +}; +use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM}; +use postgres_ffi::{ + BLCKSZ, PG_TLI, RELSEG_SIZE, WAL_SEGMENT_SIZE, XLogFileName, dispatch_pgversion, pg_constants, +}; use tokio::io; use tokio::io::AsyncWrite; -use tracing::*; - use tokio_tar::{Builder, EntryType, Header}; +use tracing::*; +use utils::lsn::Lsn; use crate::context::RequestContext; use crate::pgdatadir_mapping::Version; -use crate::tenant::storage_layer::IoConcurrency; use crate::tenant::Timeline; -use pageserver_api::reltag::{RelTag, SlruKind}; - -use postgres_ffi::dispatch_pgversion; -use postgres_ffi::pg_constants::{DEFAULTTABLESPACE_OID, GLOBALTABLESPACE_OID}; -use postgres_ffi::pg_constants::{PGDATA_SPECIAL_FILES, PG_HBA}; -use postgres_ffi::relfile_utils::{INIT_FORKNUM, MAIN_FORKNUM}; -use postgres_ffi::XLogFileName; -use postgres_ffi::PG_TLI; -use postgres_ffi::{BLCKSZ, RELSEG_SIZE, WAL_SEGMENT_SIZE}; -use utils::lsn::Lsn; +use crate::tenant::storage_layer::IoConcurrency; #[derive(Debug, thiserror::Error)] pub enum BasebackupError { diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index e2b9a7f073..ab8d37df2e 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -3,49 +3,41 @@ //! Main entry point for the Page Server executable. use std::env; -use std::env::{var, VarError}; +use std::env::{VarError, var}; use std::io::Read; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use camino::Utf8Path; use clap::{Arg, ArgAction, Command}; - -use metrics::launch_timestamp::{set_launch_timestamp_metric, LaunchTimestamp}; -use pageserver::config::PageserverIdentity; +use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric}; +use metrics::set_build_info_metric; +use pageserver::config::{PageServerConf, PageserverIdentity}; use pageserver::controller_upcall_client::ControllerUpcallClient; +use pageserver::deletion_queue::DeletionQueue; use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task}; use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING}; -use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME}; -use pageserver::tenant::{secondary, TenantSharedResources}; -use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener}; +use pageserver::task_mgr::{ + BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME, WALRECEIVER_RUNTIME, +}; +use pageserver::tenant::{TenantSharedResources, mgr, secondary}; +use pageserver::{ + CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service, + task_mgr, virtual_file, +}; +use postgres_backend::AuthType; use remote_storage::GenericRemoteStorage; use tokio::signal::unix::SignalKind; use tokio::time::Instant; use tokio_util::sync::CancellationToken; use tracing::*; - -use metrics::set_build_info_metric; -use pageserver::{ - config::PageServerConf, - deletion_queue::DeletionQueue, - http, page_cache, page_service, task_mgr, - task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME}, - tenant::mgr, - virtual_file, -}; -use postgres_backend::AuthType; +use utils::auth::{JwtAuth, SwappableJwtAuth}; use utils::crashsafe::syncfs; -use utils::failpoint_support; use utils::logging::TracingErrorLayerEnablement; -use utils::{ - auth::{JwtAuth, SwappableJwtAuth}, - logging, project_build_tag, project_git_version, - sentry_init::init_sentry, - tcp_listener, -}; +use utils::sentry_init::init_sentry; +use utils::{failpoint_support, logging, project_build_tag, project_git_version, tcp_listener}; project_git_version!(GIT_VERSION); project_build_tag!(BUILD_TAG); @@ -57,7 +49,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; /// This adds roughly 3% overhead for allocations on average, which is acceptable considering /// performance-sensitive code will avoid allocations as far as possible anyway. #[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] +#[unsafe(export_name = "malloc_conf")] pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0"; const PID_FILE_NAME: &str = "pageserver.pid"; @@ -85,6 +77,9 @@ fn main() -> anyhow::Result<()> { return Ok(()); } + // Initialize up failpoints support + let scenario = failpoint_support::init(); + let workdir = arg_matches .get_one::("workdir") .map(Utf8Path::new) @@ -178,9 +173,6 @@ fn main() -> anyhow::Result<()> { } } - // Initialize up failpoints support - let scenario = failpoint_support::init(); - // Basic initialization of things that don't change after startup tracing::info!("Initializing virtual_file..."); virtual_file::init( @@ -217,7 +209,9 @@ fn initialize_config( Ok(mut f) => { let md = f.metadata().context("stat config file")?; if !md.is_file() { - anyhow::bail!("Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ..."); + anyhow::bail!( + "Pageserver found identity file but it is a dir entry: {identity_file_path}. Aborting start up ..." + ); } let mut s = String::new(); @@ -225,7 +219,9 @@ fn initialize_config( toml_edit::de::from_str::(&s)? } Err(e) => { - anyhow::bail!("Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ..."); + anyhow::bail!( + "Pageserver could not read identity file: {identity_file_path}: {e}. Aborting start up ..." + ); } }; @@ -401,11 +397,9 @@ fn start_pageserver( Err(VarError::NotPresent) => { info!("No JWT token for authentication with Safekeeper detected"); } - Err(e) => { - return Err(e).with_context(|| { - "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable" - }) - } + Err(e) => return Err(e).with_context( + || "Failed to either load to detect non-present NEON_AUTH_TOKEN environment variable", + ), }; // Top-level cancellation token for the process @@ -711,7 +705,9 @@ async fn create_remote_storage_client( // wrapper that simulates failures. if conf.test_remote_failures > 0 { if !cfg!(feature = "testing") { - anyhow::bail!("test_remote_failures option is not available because pageserver was compiled without the 'testing' feature"); + anyhow::bail!( + "test_remote_failures option is not available because pageserver was compiled without the 'testing' feature" + ); } info!( "Simulating remote failures for first {} attempts of each op", diff --git a/pageserver/src/bin/test_helper_slow_client_reads.rs b/pageserver/src/bin/test_helper_slow_client_reads.rs index c1ce332b6c..0215dd06fb 100644 --- a/pageserver/src/bin/test_helper_slow_client_reads.rs +++ b/pageserver/src/bin/test_helper_slow_client_reads.rs @@ -1,14 +1,10 @@ -use std::{ - io::{stdin, stdout, Read, Write}, - time::Duration, -}; +use std::io::{Read, Write, stdin, stdout}; +use std::time::Duration; use clap::Parser; use pageserver_api::models::{PagestreamRequest, PagestreamTestRequest}; -use utils::{ - id::{TenantId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; #[derive(clap::Parser)] struct Args { diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 09d9444dd5..64d00882b9 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -4,36 +4,29 @@ //! file, or on the command line. //! See also `settings.md` for better description on every parameter. -use anyhow::{bail, ensure, Context}; -use pageserver_api::models::ImageCompressionAlgorithm; -use pageserver_api::{ - config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes}, - shard::TenantShardId, -}; -use remote_storage::{RemotePath, RemoteStorageConfig}; use std::env; -use storage_broker::Uri; -use utils::logging::SecretString; -use utils::postgres_client::PostgresClientProtocol; - -use once_cell::sync::OnceCell; -use reqwest::Url; use std::num::NonZeroUsize; use std::sync::Arc; use std::time::Duration; +use anyhow::{Context, bail, ensure}; use camino::{Utf8Path, Utf8PathBuf}; +use once_cell::sync::OnceCell; +use pageserver_api::config::{DiskUsageEvictionTaskConfig, MaxVectoredReadBytes}; +use pageserver_api::models::ImageCompressionAlgorithm; +use pageserver_api::shard::TenantShardId; use postgres_backend::AuthType; -use utils::{ - id::{NodeId, TimelineId}, - logging::LogFormat, -}; +use remote_storage::{RemotePath, RemoteStorageConfig}; +use reqwest::Url; +use storage_broker::Uri; +use utils::id::{NodeId, TimelineId}; +use utils::logging::{LogFormat, SecretString}; +use utils::postgres_client::PostgresClientProtocol; use crate::tenant::storage_layer::inmemory_layer::IndexEntry; use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; -use crate::virtual_file; use crate::virtual_file::io_engine; -use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME}; +use crate::{TENANT_HEATMAP_BASENAME, TENANT_LOCATION_CONFIG_NAME, virtual_file}; /// Global state of pageserver. /// @@ -440,7 +433,9 @@ impl PageServerConf { io_engine::FeatureTestResult::PlatformPreferred(v) => v, // make no noise io_engine::FeatureTestResult::Worse { engine, remark } => { // TODO: bubble this up to the caller so we can tracing::warn! it. - eprintln!("auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}"); + eprintln!( + "auto-detected IO engine is not platform-preferred: engine={engine:?} remark={remark:?}" + ); engine } }, diff --git a/pageserver/src/consumption_metrics.rs b/pageserver/src/consumption_metrics.rs index 7e8c00c293..0231190e69 100644 --- a/pageserver/src/consumption_metrics.rs +++ b/pageserver/src/consumption_metrics.rs @@ -1,13 +1,9 @@ //! Periodically collect consumption metrics for all active tenants //! and push them to a HTTP endpoint. -use crate::config::PageServerConf; -use crate::consumption_metrics::metrics::MetricsKey; -use crate::consumption_metrics::upload::KeyGen as _; -use crate::context::{DownloadBehavior, RequestContext}; -use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}; -use crate::tenant::size::CalculateSyntheticSizeError; -use crate::tenant::tasks::BackgroundLoopKind; -use crate::tenant::{mgr::TenantManager, LogicalSizeCalculationCause, Tenant}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; + use camino::Utf8PathBuf; use consumption_metrics::EventType; use itertools::Itertools as _; @@ -15,14 +11,21 @@ use pageserver_api::models::TenantState; use remote_storage::{GenericRemoteStorage, RemoteStorageConfig}; use reqwest::Url; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; use tokio::time::Instant; use tokio_util::sync::CancellationToken; use tracing::*; use utils::id::NodeId; +use crate::config::PageServerConf; +use crate::consumption_metrics::metrics::MetricsKey; +use crate::consumption_metrics::upload::KeyGen as _; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind}; +use crate::tenant::mgr::TenantManager; +use crate::tenant::size::CalculateSyntheticSizeError; +use crate::tenant::tasks::BackgroundLoopKind; +use crate::tenant::{LogicalSizeCalculationCause, Tenant}; + mod disk_cache; mod metrics; mod upload; diff --git a/pageserver/src/consumption_metrics/disk_cache.rs b/pageserver/src/consumption_metrics/disk_cache.rs index 54a505a134..f1dad8793d 100644 --- a/pageserver/src/consumption_metrics/disk_cache.rs +++ b/pageserver/src/consumption_metrics/disk_cache.rs @@ -1,10 +1,10 @@ -use anyhow::Context; -use camino::{Utf8Path, Utf8PathBuf}; use std::sync::Arc; -use crate::consumption_metrics::NewMetricsRefRoot; +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; use super::{NewMetricsRoot, NewRawMetric, RawMetric}; +use crate::consumption_metrics::NewMetricsRefRoot; pub(super) fn read_metrics_from_serde_value( json_value: serde_json::Value, diff --git a/pageserver/src/consumption_metrics/metrics.rs b/pageserver/src/consumption_metrics/metrics.rs index 07fac09f6f..71910011ea 100644 --- a/pageserver/src/consumption_metrics/metrics.rs +++ b/pageserver/src/consumption_metrics/metrics.rs @@ -1,15 +1,16 @@ -use crate::tenant::mgr::TenantManager; -use crate::{context::RequestContext, tenant::timeline::logical_size::CurrentLogicalSize}; +use std::sync::Arc; +use std::time::SystemTime; + use chrono::{DateTime, Utc}; use consumption_metrics::EventType; use futures::stream::StreamExt; -use std::{sync::Arc, time::SystemTime}; -use utils::{ - id::{TenantId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; use super::{Cache, NewRawMetric}; +use crate::context::RequestContext; +use crate::tenant::mgr::TenantManager; +use crate::tenant::timeline::logical_size::CurrentLogicalSize; /// Name of the metric, used by `MetricsKey` factory methods and `deserialize_cached_events` /// instead of static str. diff --git a/pageserver/src/consumption_metrics/metrics/tests.rs b/pageserver/src/consumption_metrics/metrics/tests.rs index 3ed7b44123..52b4fb8680 100644 --- a/pageserver/src/consumption_metrics/metrics/tests.rs +++ b/pageserver/src/consumption_metrics/metrics/tests.rs @@ -1,7 +1,7 @@ -use crate::consumption_metrics::RawMetric; +use std::collections::HashMap; use super::*; -use std::collections::HashMap; +use crate::consumption_metrics::RawMetric; #[test] fn startup_collected_timeline_metrics_before_advancing() { diff --git a/pageserver/src/consumption_metrics/upload.rs b/pageserver/src/consumption_metrics/upload.rs index 448bf47525..59e0145a5b 100644 --- a/pageserver/src/consumption_metrics/upload.rs +++ b/pageserver/src/consumption_metrics/upload.rs @@ -2,15 +2,16 @@ use std::error::Error as _; use std::time::SystemTime; use chrono::{DateTime, Utc}; -use consumption_metrics::{Event, EventChunk, IdempotencyKey, CHUNK_SIZE}; +use consumption_metrics::{CHUNK_SIZE, Event, EventChunk, IdempotencyKey}; use remote_storage::{GenericRemoteStorage, RemotePath}; use tokio::io::AsyncWriteExt; use tokio_util::sync::CancellationToken; use tracing::Instrument; - -use super::{metrics::Name, Cache, MetricsKey, NewRawMetric, RawMetric}; use utils::id::{TenantId, TimelineId}; +use super::metrics::Name; +use super::{Cache, MetricsKey, NewRawMetric, RawMetric}; + /// How the metrics from pageserver are identified. #[derive(serde::Serialize, serde::Deserialize, Debug, Clone, Copy, PartialEq)] struct Ids { @@ -438,14 +439,13 @@ async fn upload( #[cfg(test)] mod tests { - use crate::consumption_metrics::{ - disk_cache::read_metrics_from_serde_value, NewMetricsRefRoot, - }; - - use super::*; use chrono::{DateTime, Utc}; use once_cell::sync::Lazy; + use super::*; + use crate::consumption_metrics::NewMetricsRefRoot; + use crate::consumption_metrics::disk_cache::read_metrics_from_serde_value; + #[test] fn chunked_serialization() { let examples = metric_samples(); diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index 4990f17b40..8462594607 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -1,21 +1,23 @@ use std::collections::HashMap; use futures::Future; -use pageserver_api::{ - controller_api::{AvailabilityZone, NodeRegisterRequest}, - shard::TenantShardId, - upcall_api::{ - ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, - ValidateRequestTenant, ValidateResponse, - }, +use pageserver_api::config::NodeMetadata; +use pageserver_api::controller_api::{AvailabilityZone, NodeRegisterRequest}; +use pageserver_api::shard::TenantShardId; +use pageserver_api::upcall_api::{ + ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, + ValidateRequestTenant, ValidateResponse, }; -use serde::{de::DeserializeOwned, Serialize}; +use serde::Serialize; +use serde::de::DeserializeOwned; use tokio_util::sync::CancellationToken; use url::Url; -use utils::{backoff, failpoint_support, generation::Generation, id::NodeId}; +use utils::generation::Generation; +use utils::id::NodeId; +use utils::{backoff, failpoint_support}; -use crate::{config::PageServerConf, virtual_file::on_fatal_io_error}; -use pageserver_api::config::NodeMetadata; +use crate::config::PageServerConf; +use crate::virtual_file::on_fatal_io_error; /// The Pageserver's client for using the storage controller upcall API: this is a small API /// for dealing with generations (see docs/rfcs/025-generation-numbers.md). @@ -157,14 +159,18 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { match az_id_from_metadata { Some(az_id) => Some(AvailabilityZone(az_id)), None => { - tracing::warn!("metadata.json does not contain an 'availability_zone_id' field"); + tracing::warn!( + "metadata.json does not contain an 'availability_zone_id' field" + ); conf.availability_zone.clone().map(AvailabilityZone) } } }; if az_id.is_none() { - panic!("Availablity zone id could not be inferred from metadata.json or pageserver config"); + panic!( + "Availablity zone id could not be inferred from metadata.json or pageserver config" + ); } Some(NodeRegisterRequest { @@ -236,7 +242,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { .iter() .map(|(id, generation)| ValidateRequestTenant { id: *id, - gen: (*generation).into().expect( + r#gen: (*generation).into().expect( "Generation should always be valid for a Tenant doing deletions", ), }) diff --git a/pageserver/src/deletion_queue.rs b/pageserver/src/deletion_queue.rs index a2395b0dca..8118f66252 100644 --- a/pageserver/src/deletion_queue.rs +++ b/pageserver/src/deletion_queue.rs @@ -6,38 +6,31 @@ use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; -use crate::controller_upcall_client::ControlPlaneGenerationsApi; -use crate::metrics; -use crate::tenant::remote_timeline_client::remote_timeline_path; -use crate::tenant::remote_timeline_client::LayerFileMetadata; -use crate::virtual_file::MaybeFatalIo; -use crate::virtual_file::VirtualFile; use anyhow::Context; use camino::Utf8PathBuf; +use deleter::DeleterMessage; +use list_writer::ListWriterQueueMessage; use pageserver_api::shard::TenantShardId; use remote_storage::{GenericRemoteStorage, RemotePath}; -use serde::Deserialize; -use serde::Serialize; +use serde::{Deserialize, Serialize}; use thiserror::Error; use tokio_util::sync::CancellationToken; -use tracing::Instrument; -use tracing::{debug, error}; +use tracing::{Instrument, debug, error}; use utils::crashsafe::path_with_suffix_extension; use utils::generation::Generation; use utils::id::TimelineId; -use utils::lsn::AtomicLsn; -use utils::lsn::Lsn; - -use self::deleter::Deleter; -use self::list_writer::DeletionOp; -use self::list_writer::ListWriter; -use self::list_writer::RecoverOp; -use self::validator::Validator; -use deleter::DeleterMessage; -use list_writer::ListWriterQueueMessage; +use utils::lsn::{AtomicLsn, Lsn}; use validator::ValidatorQueueMessage; -use crate::{config::PageServerConf, tenant::storage_layer::LayerName}; +use self::deleter::Deleter; +use self::list_writer::{DeletionOp, ListWriter, RecoverOp}; +use self::validator::Validator; +use crate::config::PageServerConf; +use crate::controller_upcall_client::ControlPlaneGenerationsApi; +use crate::metrics; +use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_timeline_path}; +use crate::tenant::storage_layer::LayerName; +use crate::virtual_file::{MaybeFatalIo, VirtualFile}; // TODO: configurable for how long to wait before executing deletions @@ -664,21 +657,22 @@ impl DeletionQueue { #[cfg(test)] mod test { + use std::io::ErrorKind; + use std::time::Duration; + use camino::Utf8Path; use hex_literal::hex; - use pageserver_api::{key::Key, shard::ShardIndex, upcall_api::ReAttachResponseTenant}; - use std::{io::ErrorKind, time::Duration}; - use tracing::info; - + use pageserver_api::key::Key; + use pageserver_api::shard::ShardIndex; + use pageserver_api::upcall_api::ReAttachResponseTenant; use remote_storage::{RemoteStorageConfig, RemoteStorageKind}; use tokio::task::JoinHandle; - - use crate::{ - controller_upcall_client::RetryForeverError, - tenant::{harness::TenantHarness, storage_layer::DeltaLayerName}, - }; + use tracing::info; use super::*; + use crate::controller_upcall_client::RetryForeverError; + use crate::tenant::harness::TenantHarness; + use crate::tenant::storage_layer::DeltaLayerName; pub const TIMELINE_ID: TimelineId = TimelineId::from_array(hex!("11223344556677881122334455667788")); @@ -724,26 +718,26 @@ mod test { .expect("Failed to join workers for previous deletion queue"); } - fn set_latest_generation(&self, gen: Generation) { + fn set_latest_generation(&self, gen_: Generation) { let tenant_shard_id = self.harness.tenant_shard_id; self.mock_control_plane .latest_generation .lock() .unwrap() - .insert(tenant_shard_id, gen); + .insert(tenant_shard_id, gen_); } /// Returns remote layer file name, suitable for use in assert_remote_files fn write_remote_layer( &self, file_name: LayerName, - gen: Generation, + gen_: Generation, ) -> anyhow::Result { let tenant_shard_id = self.harness.tenant_shard_id; let relative_remote_path = remote_timeline_path(&tenant_shard_id, &TIMELINE_ID); let remote_timeline_path = self.remote_fs_dir.join(relative_remote_path.get_path()); std::fs::create_dir_all(&remote_timeline_path)?; - let remote_layer_file_name = format!("{}{}", file_name, gen.get_suffix()); + let remote_layer_file_name = format!("{}{}", file_name, gen_.get_suffix()); let content: Vec = format!("placeholder contents of {file_name}").into(); @@ -1098,11 +1092,12 @@ mod test { /// or coalescing, and doesn't actually execute any deletions unless you call pump() to kick it. #[cfg(test)] pub(crate) mod mock { + use std::sync::atomic::{AtomicUsize, Ordering}; + use tracing::info; use super::*; use crate::tenant::remote_timeline_client::remote_layer_path; - use std::sync::atomic::{AtomicUsize, Ordering}; pub struct ConsumerState { rx: tokio::sync::mpsc::UnboundedReceiver, diff --git a/pageserver/src/deletion_queue/deleter.rs b/pageserver/src/deletion_queue/deleter.rs index ef1dfbac19..691ba75cc7 100644 --- a/pageserver/src/deletion_queue/deleter.rs +++ b/pageserver/src/deletion_queue/deleter.rs @@ -6,21 +6,16 @@ //! number of full-sized DeleteObjects requests, rather than a larger number of //! smaller requests. -use remote_storage::GenericRemoteStorage; -use remote_storage::RemotePath; -use remote_storage::TimeoutOrCancel; use std::time::Duration; + +use remote_storage::{GenericRemoteStorage, RemotePath, TimeoutOrCancel}; use tokio_util::sync::CancellationToken; -use tracing::info; -use tracing::warn; -use utils::backoff; -use utils::pausable_failpoint; +use tracing::{info, warn}; +use utils::{backoff, pausable_failpoint}; +use super::{DeletionQueueError, FlushOp}; use crate::metrics; -use super::DeletionQueueError; -use super::FlushOp; - const AUTOFLUSH_INTERVAL: Duration = Duration::from_secs(10); pub(super) enum DeleterMessage { diff --git a/pageserver/src/deletion_queue/list_writer.rs b/pageserver/src/deletion_queue/list_writer.rs index ae3b2c9180..a385e35a02 100644 --- a/pageserver/src/deletion_queue/list_writer.rs +++ b/pageserver/src/deletion_queue/list_writer.rs @@ -10,11 +10,6 @@ //! //! DeletionLists are passed onwards to the Validator. -use super::DeletionHeader; -use super::DeletionList; -use super::FlushOp; -use super::ValidatorQueueMessage; - use std::collections::HashMap; use std::fs::create_dir_all; use std::time::Duration; @@ -23,20 +18,17 @@ use pageserver_api::shard::TenantShardId; use regex::Regex; use remote_storage::RemotePath; use tokio_util::sync::CancellationToken; -use tracing::debug; -use tracing::info; -use tracing::warn; +use tracing::{debug, info, warn}; use utils::generation::Generation; use utils::id::TimelineId; +use super::{DeletionHeader, DeletionList, FlushOp, ValidatorQueueMessage}; use crate::config::PageServerConf; use crate::deletion_queue::TEMP_SUFFIX; use crate::metrics; -use crate::tenant::remote_timeline_client::remote_layer_path; -use crate::tenant::remote_timeline_client::LayerFileMetadata; +use crate::tenant::remote_timeline_client::{LayerFileMetadata, remote_layer_path}; use crate::tenant::storage_layer::LayerName; -use crate::virtual_file::on_fatal_io_error; -use crate::virtual_file::MaybeFatalIo; +use crate::virtual_file::{MaybeFatalIo, on_fatal_io_error}; // The number of keys in a DeletionList before we will proactively persist it // (without reaching a flush deadline). This aims to deliver objects of the order diff --git a/pageserver/src/deletion_queue/validator.rs b/pageserver/src/deletion_queue/validator.rs index 1d55581ebd..b0ce2b80b4 100644 --- a/pageserver/src/deletion_queue/validator.rs +++ b/pageserver/src/deletion_queue/validator.rs @@ -20,22 +20,14 @@ use std::time::Duration; use camino::Utf8PathBuf; use tokio_util::sync::CancellationToken; -use tracing::debug; -use tracing::info; -use tracing::warn; - -use crate::config::PageServerConf; -use crate::controller_upcall_client::ControlPlaneGenerationsApi; -use crate::controller_upcall_client::RetryForeverError; -use crate::metrics; -use crate::virtual_file::MaybeFatalIo; +use tracing::{debug, info, warn}; use super::deleter::DeleterMessage; -use super::DeletionHeader; -use super::DeletionList; -use super::DeletionQueueError; -use super::FlushOp; -use super::VisibleLsnUpdates; +use super::{DeletionHeader, DeletionList, DeletionQueueError, FlushOp, VisibleLsnUpdates}; +use crate::config::PageServerConf; +use crate::controller_upcall_client::{ControlPlaneGenerationsApi, RetryForeverError}; +use crate::metrics; +use crate::virtual_file::MaybeFatalIo; // After this length of time, do any validation work that is pending, // even if we haven't accumulated many keys to delete. @@ -190,7 +182,10 @@ where } } else { // If we failed validation, then do not apply any of the projected updates - info!("Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", tenant_lsn_state.generation); + info!( + "Dropped remote consistent LSN updates for tenant {tenant_id} in stale generation {:?}", + tenant_lsn_state.generation + ); metrics::DELETION_QUEUE.dropped_lsn_updates.inc(); } } diff --git a/pageserver/src/disk_usage_eviction_task.rs b/pageserver/src/disk_usage_eviction_task.rs index 738a783813..13252037e5 100644 --- a/pageserver/src/disk_usage_eviction_task.rs +++ b/pageserver/src/disk_usage_eviction_task.rs @@ -41,30 +41,31 @@ // - The `#[allow(dead_code)]` above various structs are to suppress warnings about only the Debug impl // reading these fields. We use the Debug impl for semi-structured logging, though. -use std::{sync::Arc, time::SystemTime}; +use std::sync::Arc; +use std::time::SystemTime; use anyhow::Context; -use pageserver_api::{config::DiskUsageEvictionTaskConfig, shard::TenantShardId}; +use pageserver_api::config::DiskUsageEvictionTaskConfig; +use pageserver_api::shard::TenantShardId; use remote_storage::GenericRemoteStorage; use serde::Serialize; use tokio::time::Instant; use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, instrument, warn, Instrument}; -use utils::{completion, id::TimelineId}; +use tracing::{Instrument, debug, error, info, instrument, warn}; +use utils::completion; +use utils::id::TimelineId; -use crate::{ - config::PageServerConf, - metrics::disk_usage_based_eviction::METRICS, - task_mgr::{self, BACKGROUND_RUNTIME}, - tenant::{ - mgr::TenantManager, - remote_timeline_client::LayerFileMetadata, - secondary::SecondaryTenant, - storage_layer::{AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint}, - tasks::sleep_random, - }, - CancellableTask, DiskUsageEvictionTask, +use crate::config::PageServerConf; +use crate::metrics::disk_usage_based_eviction::METRICS; +use crate::task_mgr::{self, BACKGROUND_RUNTIME}; +use crate::tenant::mgr::TenantManager; +use crate::tenant::remote_timeline_client::LayerFileMetadata; +use crate::tenant::secondary::SecondaryTenant; +use crate::tenant::storage_layer::{ + AsLayerDesc, EvictionError, Layer, LayerName, LayerVisibilityHint, }; +use crate::tenant::tasks::sleep_random; +use crate::{CancellableTask, DiskUsageEvictionTask}; /// Selects the sort order for eviction candidates *after* per tenant `min_resident_size` /// partitioning. @@ -1007,10 +1008,14 @@ async fn collect_eviction_candidates( } } - debug_assert!(EvictionPartition::Above < EvictionPartition::Below, - "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"); - debug_assert!(EvictionPartition::EvictNow < EvictionPartition::Above, - "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first"); + debug_assert!( + EvictionPartition::Above < EvictionPartition::Below, + "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first" + ); + debug_assert!( + EvictionPartition::EvictNow < EvictionPartition::Above, + "as explained in the function's doc comment, layers that aren't in the tenant's min_resident_size are evicted first" + ); eviction_order.sort(&mut candidates); @@ -1157,9 +1162,8 @@ mod filesystem_level_usage { use anyhow::Context; use camino::Utf8Path; - use crate::statvfs::Statvfs; - use super::DiskUsageEvictionTaskConfig; + use crate::statvfs::Statvfs; #[derive(Debug, Clone, Copy)] pub struct Usage<'a> { @@ -1224,10 +1228,12 @@ mod filesystem_level_usage { #[test] fn max_usage_pct_pressure() { - use super::Usage as _; use std::time::Duration; + use utils::serde_percent::Percent; + use super::Usage as _; + let mut usage = Usage { config: &DiskUsageEvictionTaskConfig { max_usage_pct: Percent::new(85).unwrap(), diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 9f37fc32a3..dd5a24a41f 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2,125 +2,83 @@ //! Management HTTP API //! use std::cmp::Reverse; -use std::collections::BinaryHeap; -use std::collections::HashMap; +use std::collections::{BinaryHeap, HashMap}; use std::str::FromStr; use std::sync::Arc; use std::time::Duration; -use anyhow::{anyhow, Context, Result}; +use anyhow::{Context, Result, anyhow}; use enumset::EnumSet; use futures::future::join_all; -use futures::StreamExt; -use futures::TryFutureExt; +use futures::{StreamExt, TryFutureExt}; use http_utils::endpoint::{ - profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, + self, attach_openapi_ui, auth_middleware, check_permission_with, profile_cpu_handler, + profile_heap_handler, prometheus_metrics_handler, request_span, }; +use http_utils::error::{ApiError, HttpErrorBody}; use http_utils::failpoints::failpoints_handler; -use http_utils::request::must_parse_query_param; -use http_utils::request::{get_request_param, must_get_query_param, parse_query_param}; +use http_utils::json::{json_request, json_request_maybe, json_response}; +use http_utils::request::{ + get_request_param, must_get_query_param, must_parse_query_param, parse_query_param, + parse_request_param, +}; +use http_utils::{RequestExt, RouterBuilder}; use humantime::format_rfc3339; -use hyper::header; -use hyper::StatusCode; -use hyper::{Body, Request, Response, Uri}; +use hyper::{Body, Request, Response, StatusCode, Uri, header}; use metrics::launch_timestamp::LaunchTimestamp; use pageserver_api::models::virtual_file::IoMode; -use pageserver_api::models::DownloadRemoteLayersTaskSpawnRequest; -use pageserver_api::models::IngestAuxFilesRequest; -use pageserver_api::models::ListAuxFilesRequest; -use pageserver_api::models::LocationConfig; -use pageserver_api::models::LocationConfigListResponse; -use pageserver_api::models::LocationConfigMode; -use pageserver_api::models::LsnLease; -use pageserver_api::models::LsnLeaseRequest; -use pageserver_api::models::OffloadedTimelineInfo; -use pageserver_api::models::PageTraceEvent; -use pageserver_api::models::ShardParameters; -use pageserver_api::models::TenantConfigPatchRequest; -use pageserver_api::models::TenantDetails; -use pageserver_api::models::TenantLocationConfigRequest; -use pageserver_api::models::TenantLocationConfigResponse; -use pageserver_api::models::TenantScanRemoteStorageResponse; -use pageserver_api::models::TenantScanRemoteStorageShard; -use pageserver_api::models::TenantShardLocation; -use pageserver_api::models::TenantShardSplitRequest; -use pageserver_api::models::TenantShardSplitResponse; -use pageserver_api::models::TenantSorting; -use pageserver_api::models::TenantState; -use pageserver_api::models::TenantWaitLsnRequest; -use pageserver_api::models::TimelineArchivalConfigRequest; -use pageserver_api::models::TimelineCreateRequestMode; -use pageserver_api::models::TimelineCreateRequestModeImportPgdata; -use pageserver_api::models::TimelinesInfoAndOffloaded; -use pageserver_api::models::TopTenantShardItem; -use pageserver_api::models::TopTenantShardsRequest; -use pageserver_api::models::TopTenantShardsResponse; -use pageserver_api::shard::ShardCount; -use pageserver_api::shard::TenantShardId; -use remote_storage::DownloadError; -use remote_storage::GenericRemoteStorage; -use remote_storage::TimeTravelError; +use pageserver_api::models::{ + DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest, + LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest, + OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse, + TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo, + TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse, + TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest, + TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest, + TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode, + TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo, + TimelinesInfoAndOffloaded, TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse, +}; +use pageserver_api::shard::{ShardCount, TenantShardId}; +use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError}; use scopeguard::defer; -use tenant_size_model::{svg::SvgBranchKind, SizeResult, StorageModel}; +use tenant_size_model::svg::SvgBranchKind; +use tenant_size_model::{SizeResult, StorageModel}; use tokio::time::Instant; use tokio_util::io::StreamReader; use tokio_util::sync::CancellationToken; use tracing::*; +use utils::auth::SwappableJwtAuth; +use utils::generation::Generation; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; use crate::config::PageServerConf; -use crate::context::RequestContextBuilder; -use crate::context::{DownloadBehavior, RequestContext}; +use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder}; use crate::deletion_queue::DeletionQueueClient; use crate::pgdatadir_mapping::LsnForTimestamp; use crate::task_mgr::TaskKind; use crate::tenant::config::{LocationConf, TenantConfOpt}; -use crate::tenant::mgr::GetActiveTenantError; use crate::tenant::mgr::{ - GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlotError, - TenantSlotUpsertError, TenantStateError, + GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, + TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError, +}; +use crate::tenant::remote_timeline_client::{ + download_index_part, list_remote_tenant_shards, list_remote_timelines, }; -use crate::tenant::mgr::{TenantSlot, UpsertLocationError}; -use crate::tenant::remote_timeline_client; -use crate::tenant::remote_timeline_client::download_index_part; -use crate::tenant::remote_timeline_client::list_remote_tenant_shards; -use crate::tenant::remote_timeline_client::list_remote_timelines; use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; -use crate::tenant::storage_layer::IoConcurrency; -use crate::tenant::storage_layer::LayerAccessStatsReset; -use crate::tenant::storage_layer::LayerName; -use crate::tenant::timeline::import_pgdata; -use crate::tenant::timeline::offload::offload_timeline; -use crate::tenant::timeline::offload::OffloadError; -use crate::tenant::timeline::CompactFlags; -use crate::tenant::timeline::CompactOptions; -use crate::tenant::timeline::CompactRequest; -use crate::tenant::timeline::CompactionError; -use crate::tenant::timeline::Timeline; -use crate::tenant::timeline::WaitLsnTimeout; -use crate::tenant::timeline::WaitLsnWaiter; -use crate::tenant::GetTimelineError; -use crate::tenant::OffloadedTimeline; -use crate::tenant::{LogicalSizeCalculationCause, PageReconstructError}; -use crate::DEFAULT_PG_VERSION; -use crate::{disk_usage_eviction_task, tenant}; -use http_utils::{ - endpoint::{self, attach_openapi_ui, auth_middleware, check_permission_with}, - error::{ApiError, HttpErrorBody}, - json::{json_request, json_request_maybe, json_response}, - request::parse_request_param, - RequestExt, RouterBuilder, +use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName}; +use crate::tenant::timeline::offload::{OffloadError, offload_timeline}; +use crate::tenant::timeline::{ + CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout, + WaitLsnWaiter, import_pgdata, }; -use pageserver_api::models::{ - StatusResponse, TenantConfigRequest, TenantInfo, TimelineCreateRequest, TimelineGcRequest, - TimelineInfo, -}; -use utils::{ - auth::SwappableJwtAuth, - generation::Generation, - id::{TenantId, TimelineId}, - lsn::Lsn, +use crate::tenant::{ + GetTimelineError, LogicalSizeCalculationCause, OffloadedTimeline, PageReconstructError, + remote_timeline_client, }; +use crate::{DEFAULT_PG_VERSION, disk_usage_eviction_task, tenant}; // For APIs that require an Active tenant, how long should we block waiting for that state? // This is not functionally necessary (clients will retry), but avoids generating a lot of @@ -1128,12 +1086,12 @@ async fn tenant_list_handler( ApiError::ResourceUnavailable("Tenant map is initializing or shutting down".into()) })? .iter() - .map(|(id, state, gen)| TenantInfo { + .map(|(id, state, gen_)| TenantInfo { id: *id, state: state.clone(), current_physical_size: None, attachment_status: state.attachment_status(), - generation: (*gen) + generation: (*gen_) .into() .expect("Tenants are always attached with a generation"), gc_blocking: None, @@ -1670,9 +1628,8 @@ async fn block_or_unblock_gc( request: Request, block: bool, ) -> Result, ApiError> { - use crate::tenant::{ - remote_timeline_client::WaitCompletionError, upload_queue::NotInitialized, - }; + use crate::tenant::remote_timeline_client::WaitCompletionError; + use crate::tenant::upload_queue::NotInitialized; let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; @@ -2058,7 +2015,9 @@ async fn tenant_time_travel_remote_storage_handler( ))); } - tracing::info!("Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}"); + tracing::info!( + "Issuing time travel request internally. timestamp={timestamp_raw}, done_if_after={done_if_after_raw}" + ); remote_timeline_client::upload::time_travel_recover_tenant( &state.remote_storage, @@ -2459,9 +2418,10 @@ async fn timeline_detach_ancestor_handler( request: Request, _cancel: CancellationToken, ) -> Result, ApiError> { - use crate::tenant::timeline::detach_ancestor; use pageserver_api::models::detach_ancestor::AncestorDetached; + use crate::tenant::timeline::detach_ancestor; + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; @@ -2806,14 +2766,19 @@ async fn tenant_scan_remote_handler( .await { Ok((index_part, index_generation, _index_mtime)) => { - tracing::info!("Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)", - index_part.layer_metadata.len(), index_part.metadata.disk_consistent_lsn()); + tracing::info!( + "Found timeline {tenant_shard_id}/{timeline_id} metadata (gen {index_generation:?}, {} layers, {} consistent LSN)", + index_part.layer_metadata.len(), + index_part.metadata.disk_consistent_lsn() + ); generation = std::cmp::max(generation, index_generation); } Err(DownloadError::NotFound) => { // This is normal for tenants that were created with multiple shards: they have an unsharded path // containing the timeline's initdb tarball but no index. Otherwise it is a bit strange. - tracing::info!("Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping"); + tracing::info!( + "Timeline path {tenant_shard_id}/{timeline_id} exists in remote storage but has no index, skipping" + ); continue; } Err(e) => { @@ -3432,7 +3397,9 @@ async fn read_tar_eof(mut reader: (impl tokio::io::AsyncRead + Unpin)) -> anyhow anyhow::bail!("unexpected non-zero bytes after the tar archive"); } if trailing_bytes % 512 != 0 { - anyhow::bail!("unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive"); + anyhow::bail!( + "unexpected number of zeros ({trailing_bytes}), not divisible by tar block size (512 bytes), after the tar archive" + ); } Ok(()) } diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index a73fa5cec8..6dd005de50 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -4,14 +4,22 @@ //! use std::path::{Path, PathBuf}; -use anyhow::{bail, ensure, Context, Result}; +use anyhow::{Context, Result, bail, ensure}; use bytes::Bytes; use camino::Utf8Path; use futures::StreamExt; use pageserver_api::key::rel_block_to_key; +use pageserver_api::reltag::{RelTag, SlruKind}; +use postgres_ffi::relfile_utils::*; +use postgres_ffi::waldecoder::WalStreamDecoder; +use postgres_ffi::{ + BLCKSZ, ControlFileData, DBState_DB_SHUTDOWNED, Oid, WAL_SEGMENT_SIZE, XLogFileName, + pg_constants, +}; use tokio::io::{AsyncRead, AsyncReadExt}; use tokio_tar::Archive; use tracing::*; +use utils::lsn::Lsn; use wal_decoder::models::InterpretedWalRecord; use walkdir::WalkDir; @@ -20,16 +28,6 @@ use crate::metrics::WAL_INGEST; use crate::pgdatadir_mapping::*; use crate::tenant::Timeline; use crate::walingest::WalIngest; -use pageserver_api::reltag::{RelTag, SlruKind}; -use postgres_ffi::pg_constants; -use postgres_ffi::relfile_utils::*; -use postgres_ffi::waldecoder::WalStreamDecoder; -use postgres_ffi::ControlFileData; -use postgres_ffi::DBState_DB_SHUTDOWNED; -use postgres_ffi::Oid; -use postgres_ffi::XLogFileName; -use postgres_ffi::{BLCKSZ, WAL_SEGMENT_SIZE}; -use utils::lsn::Lsn; // Returns checkpoint LSN from controlfile pub fn get_lsn_from_controlfile(path: &Utf8Path) -> Result { diff --git a/pageserver/src/l0_flush.rs b/pageserver/src/l0_flush.rs index 491c9fb96c..6cfecef0cf 100644 --- a/pageserver/src/l0_flush.rs +++ b/pageserver/src/l0_flush.rs @@ -1,4 +1,5 @@ -use std::{num::NonZeroUsize, sync::Arc}; +use std::num::NonZeroUsize; +use std::sync::Arc; #[derive(Debug, PartialEq, Eq, Clone)] pub enum L0FlushConfig { diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index f43cd08cf7..02767055fb 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -15,7 +15,8 @@ pub mod l0_flush; extern crate hyper0 as hyper; -use futures::{stream::FuturesUnordered, StreamExt}; +use futures::StreamExt; +use futures::stream::FuturesUnordered; pub use pageserver_api::keyspace; use tokio_util::sync::CancellationToken; mod assert_u64_eq_usize; @@ -35,10 +36,8 @@ pub mod walredo; use camino::Utf8Path; use deletion_queue::DeletionQueue; -use tenant::{ - mgr::{BackgroundPurges, TenantManager}, - secondary, -}; +use tenant::mgr::{BackgroundPurges, TenantManager}; +use tenant::secondary; use tracing::{info, info_span}; /// Current storage format version @@ -350,9 +349,10 @@ async fn timed_after_cancellation( #[cfg(test)] mod timed_tests { - use super::timed; use std::time::Duration; + use super::timed; + #[tokio::test] async fn timed_completes_when_inner_future_completes() { // A future that completes on time should have its result returned diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index e1c26b0684..eb8a9b8e24 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -10,11 +10,11 @@ use std::time::{Duration, Instant}; use enum_map::{Enum as _, EnumMap}; use futures::Future; use metrics::{ + Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, + IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec, - Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, - IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec, }; use once_cell::sync::Lazy; use pageserver_api::config::{ @@ -24,9 +24,8 @@ use pageserver_api::config::{ use pageserver_api::models::InMemoryLayerInfo; use pageserver_api::shard::TenantShardId; use pin_project_lite::pin_project; -use postgres_backend::{is_expected_io_error, QueryError}; +use postgres_backend::{QueryError, is_expected_io_error}; use pq_proto::framed::ConnectionError; - use strum::{EnumCount, IntoEnumIterator as _, VariantNames}; use strum_macros::{IntoStaticStr, VariantNames}; use utils::id::TimelineId; @@ -35,12 +34,12 @@ use crate::config::PageServerConf; use crate::context::{PageContentKind, RequestContext}; use crate::pgdatadir_mapping::DatadirModificationStats; use crate::task_mgr::TaskKind; +use crate::tenant::Timeline; use crate::tenant::layer_map::LayerMap; use crate::tenant::mgr::TenantSlot; use crate::tenant::storage_layer::{InMemoryLayer, PersistentLayerDesc}; use crate::tenant::tasks::BackgroundLoopKind; use crate::tenant::throttle::ThrottleResult; -use crate::tenant::Timeline; /// Prometheus histogram buckets (in seconds) for operations in the critical /// path. In other words, operations that directly affect that latency of user @@ -363,7 +362,7 @@ pub(crate) static PAGE_CACHE_SIZE: Lazy = pub(crate) mod page_cache_eviction_metrics { use std::num::NonZeroUsize; - use metrics::{register_int_counter_vec, IntCounter, IntCounterVec}; + use metrics::{IntCounter, IntCounterVec, register_int_counter_vec}; use once_cell::sync::Lazy; #[derive(Clone, Copy)] @@ -722,7 +721,7 @@ pub(crate) static RELSIZE_CACHE_MISSES_OLD: Lazy = Lazy::new(|| { }); pub(crate) mod initial_logical_size { - use metrics::{register_int_counter, register_int_counter_vec, IntCounter, IntCounterVec}; + use metrics::{IntCounter, IntCounterVec, register_int_counter, register_int_counter_vec}; use once_cell::sync::Lazy; pub(crate) struct StartCalculation(IntCounterVec); @@ -1105,12 +1104,17 @@ impl EvictionsWithLowResidenceDuration { // - future "drop panick => abort" // // so just nag: (the error has the labels) - tracing::warn!("failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}"); + tracing::warn!( + "failed to remove EvictionsWithLowResidenceDuration, it was already removed? {e:#?}" + ); } Ok(()) => { // to help identify cases where we double-remove the same values, let's log all // deletions? - tracing::info!("removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", self.data_source); + tracing::info!( + "removed EvictionsWithLowResidenceDuration with {tenant_id}, {timeline_id}, {}, {threshold}", + self.data_source + ); } } } @@ -3574,12 +3578,10 @@ impl>, O, E> Future for MeasuredRemoteOp { } pub mod tokio_epoll_uring { - use std::{ - collections::HashMap, - sync::{Arc, Mutex}, - }; + use std::collections::HashMap; + use std::sync::{Arc, Mutex}; - use metrics::{register_histogram, register_int_counter, Histogram, LocalHistogram, UIntGauge}; + use metrics::{Histogram, LocalHistogram, UIntGauge, register_histogram, register_int_counter}; use once_cell::sync::Lazy; /// Shared storage for tokio-epoll-uring thread local metrics. @@ -3588,7 +3590,9 @@ pub mod tokio_epoll_uring { let slots_submission_queue_depth = register_histogram!( "pageserver_tokio_epoll_uring_slots_submission_queue_depth", "The slots waiters queue depth of each tokio_epoll_uring system", - vec![1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0], + vec![ + 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0 + ], ) .expect("failed to define a metric"); ThreadLocalMetricsStorage { @@ -3765,7 +3769,7 @@ pub mod tokio_epoll_uring { } pub(crate) mod tenant_throttling { - use metrics::{register_int_counter_vec, IntCounter}; + use metrics::{IntCounter, register_int_counter_vec}; use once_cell::sync::Lazy; use utils::shard::TenantShardId; diff --git a/pageserver/src/page_cache.rs b/pageserver/src/page_cache.rs index 45bf02362a..984dd125a9 100644 --- a/pageserver/src/page_cache.rs +++ b/pageserver/src/page_cache.rs @@ -67,23 +67,18 @@ //! mapping is automatically removed and the slot is marked free. //! -use std::{ - collections::{hash_map::Entry, HashMap}, - sync::{ - atomic::{AtomicU64, AtomicU8, AtomicUsize, Ordering}, - Arc, Weak, - }, - time::Duration, -}; +use std::collections::HashMap; +use std::collections::hash_map::Entry; +use std::sync::atomic::{AtomicU8, AtomicU64, AtomicUsize, Ordering}; +use std::sync::{Arc, Weak}; +use std::time::Duration; use anyhow::Context; use once_cell::sync::OnceCell; -use crate::{ - context::RequestContext, - metrics::{page_cache_eviction_metrics, PageCacheSizeMetrics}, - virtual_file::{IoBufferMut, IoPageSlice}, -}; +use crate::context::RequestContext; +use crate::metrics::{PageCacheSizeMetrics, page_cache_eviction_metrics}; +use crate::virtual_file::{IoBufferMut, IoPageSlice}; static PAGE_CACHE: OnceCell = OnceCell::new(); const TEST_PAGE_CACHE_SIZE: usize = 50; @@ -168,11 +163,7 @@ impl Slot { let count_res = self.usage_count .fetch_update(Ordering::Relaxed, Ordering::Relaxed, |val| { - if val == 0 { - None - } else { - Some(val - 1) - } + if val == 0 { None } else { Some(val - 1) } }); match count_res { diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 668f0eee36..8972515163 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1,7 +1,15 @@ //! The Page Service listens for client connections and serves their GetPage@LSN //! requests. -use anyhow::{bail, Context}; +use std::borrow::Cow; +use std::num::NonZeroUsize; +use std::os::fd::AsRawFd; +use std::str::FromStr; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime}; +use std::{io, str}; + +use anyhow::{Context, bail}; use async_compression::tokio::write::GzipEncoder; use bytes::Buf; use futures::FutureExt; @@ -11,72 +19,57 @@ use pageserver_api::config::{ PageServicePipeliningConfig, PageServicePipeliningConfigPipelined, PageServiceProtocolPipelinedExecutionStrategy, }; -use pageserver_api::models::{self, TenantState}; +use pageserver_api::key::rel_block_to_key; use pageserver_api::models::{ - PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse, + self, PageTraceEvent, PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse, PagestreamErrorResponse, PagestreamExistsRequest, PagestreamExistsResponse, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, - PagestreamProtocolVersion, PagestreamRequest, + PagestreamProtocolVersion, PagestreamRequest, TenantState, }; +use pageserver_api::reltag::SlruKind; use pageserver_api::shard::TenantShardId; use postgres_backend::{ - is_expected_io_error, AuthType, PostgresBackend, PostgresBackendReader, QueryError, + AuthType, PostgresBackend, PostgresBackendReader, QueryError, is_expected_io_error, }; +use postgres_ffi::BLCKSZ; +use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use pq_proto::framed::ConnectionError; -use pq_proto::FeStartupPacket; -use pq_proto::{BeMessage, FeMessage, RowDescriptor}; -use std::borrow::Cow; -use std::io; -use std::num::NonZeroUsize; -use std::str; -use std::str::FromStr; -use std::sync::Arc; -use std::time::SystemTime; -use std::time::{Duration, Instant}; +use pq_proto::{BeMessage, FeMessage, FeStartupPacket, RowDescriptor}; use strum_macros::IntoStaticStr; -use tokio::io::{AsyncRead, AsyncWrite}; -use tokio::io::{AsyncWriteExt, BufWriter}; +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt, BufWriter}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::*; +use utils::auth::{Claims, Scope, SwappableJwtAuth}; +use utils::failpoint_support; +use utils::id::{TenantId, TimelineId}; use utils::logging::log_slow; +use utils::lsn::Lsn; +use utils::simple_rcu::RcuReadGuard; use utils::sync::gate::{Gate, GateGuard}; use utils::sync::spsc_fold; -use utils::{ - auth::{Claims, Scope, SwappableJwtAuth}, - failpoint_support, - id::{TenantId, TimelineId}, - lsn::Lsn, - simple_rcu::RcuReadGuard, -}; use crate::auth::check_permission; use crate::basebackup::BasebackupError; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; -use crate::metrics::{self, SmgrOpTimer}; -use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS}; +use crate::metrics::{ + self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer, +}; use crate::pgdatadir_mapping::Version; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id; -use crate::task_mgr::TaskKind; -use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME}; -use crate::tenant::mgr::ShardSelector; -use crate::tenant::mgr::TenantManager; -use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult}; +use crate::span::{ + debug_assert_current_span_has_tenant_and_timeline_id, + debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id, +}; +use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME, TaskKind}; +use crate::tenant::mgr::{ + GetActiveTenantError, GetTenantError, ShardResolveResult, ShardSelector, TenantManager, +}; use crate::tenant::storage_layer::IoConcurrency; use crate::tenant::timeline::{self, WaitLsnError}; -use crate::tenant::GetTimelineError; -use crate::tenant::PageReconstructError; -use crate::tenant::Timeline; +use crate::tenant::{GetTimelineError, PageReconstructError, Timeline}; use crate::{basebackup, timed_after_cancellation}; -use pageserver_api::key::rel_block_to_key; -use pageserver_api::models::PageTraceEvent; -use pageserver_api::reltag::SlruKind; -use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; -use postgres_ffi::BLCKSZ; -use std::os::fd::AsRawFd; /// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which /// is not yet in state [`TenantState::Active`]. @@ -986,7 +979,7 @@ impl PageServerHandler { Ok(BatchedFeMessage::GetPage { span: _, shard: accum_shard, - pages: ref mut accum_pages, + pages: accum_pages, effective_request_lsn: accum_lsn, }), BatchedFeMessage::GetPage { @@ -1236,12 +1229,13 @@ impl PageServerHandler { } => { fail::fail_point!("ps::handle-pagerequest-message::exists"); ( - vec![self - .handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx) - .instrument(span.clone()) - .await - .map(|msg| (msg, timer)) - .map_err(|err| BatchedPageStreamError { err, req: req.hdr })], + vec![ + self.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer)) + .map_err(|err| BatchedPageStreamError { err, req: req.hdr }), + ], span, ) } @@ -1253,12 +1247,13 @@ impl PageServerHandler { } => { fail::fail_point!("ps::handle-pagerequest-message::nblocks"); ( - vec![self - .handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx) - .instrument(span.clone()) - .await - .map(|msg| (msg, timer)) - .map_err(|err| BatchedPageStreamError { err, req: req.hdr })], + vec![ + self.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer)) + .map_err(|err| BatchedPageStreamError { err, req: req.hdr }), + ], span, ) } @@ -1297,12 +1292,13 @@ impl PageServerHandler { } => { fail::fail_point!("ps::handle-pagerequest-message::dbsize"); ( - vec![self - .handle_db_size_request(&*shard.upgrade()?, &req, ctx) - .instrument(span.clone()) - .await - .map(|msg| (msg, timer)) - .map_err(|err| BatchedPageStreamError { err, req: req.hdr })], + vec![ + self.handle_db_size_request(&*shard.upgrade()?, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer)) + .map_err(|err| BatchedPageStreamError { err, req: req.hdr }), + ], span, ) } @@ -1314,12 +1310,13 @@ impl PageServerHandler { } => { fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); ( - vec![self - .handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx) - .instrument(span.clone()) - .await - .map(|msg| (msg, timer)) - .map_err(|err| BatchedPageStreamError { err, req: req.hdr })], + vec![ + self.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx) + .instrument(span.clone()) + .await + .map(|msg| (msg, timer)) + .map_err(|err| BatchedPageStreamError { err, req: req.hdr }), + ], span, ) } @@ -2112,7 +2109,9 @@ impl PageServerHandler { set_tracing_field_shard_id(&timeline); if timeline.is_archived() == Some(true) { - tracing::info!("timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it."); + tracing::info!( + "timeline {tenant_id}/{timeline_id} is archived, but got basebackup request for it." + ); return Err(QueryError::NotFound("timeline is archived".into())); } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index d0e2dab042..787b1b895c 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -6,6 +6,36 @@ //! walingest.rs handles a few things like implicit relation creation and extension. //! Clarify that) //! +use std::collections::{BTreeMap, HashMap, HashSet, hash_map}; +use std::ops::{ControlFlow, Range}; + +use anyhow::{Context, ensure}; +use bytes::{Buf, Bytes, BytesMut}; +use enum_map::Enum; +use itertools::Itertools; +use pageserver_api::key::{ + AUX_FILES_KEY, CHECKPOINT_KEY, CONTROLFILE_KEY, CompactKey, DBDIR_KEY, Key, RelDirExists, + TWOPHASEDIR_KEY, dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, + rel_size_to_key, rel_tag_sparse_key, rel_tag_sparse_key_range, relmap_file_key, + repl_origin_key, repl_origin_key_range, slru_block_to_key, slru_dir_to_key, + slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range, +}; +use pageserver_api::keyspace::SparseKeySpace; +use pageserver_api::record::NeonWalRecord; +use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; +use pageserver_api::shard::ShardIdentity; +use pageserver_api::value::Value; +use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM}; +use postgres_ffi::{BLCKSZ, Oid, RepOriginId, TimestampTz, TransactionId}; +use serde::{Deserialize, Serialize}; +use strum::IntoEnumIterator; +use tokio_util::sync::CancellationToken; +use tracing::{debug, info, trace, warn}; +use utils::bin_ser::{BeSer, DeserializeError}; +use utils::lsn::Lsn; +use utils::pausable_failpoint; +use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta}; + use super::tenant::{PageReconstructError, Timeline}; use crate::aux_file; use crate::context::RequestContext; @@ -19,37 +49,6 @@ use crate::span::{ }; use crate::tenant::storage_layer::IoConcurrency; use crate::tenant::timeline::GetVectoredError; -use anyhow::{ensure, Context}; -use bytes::{Buf, Bytes, BytesMut}; -use enum_map::Enum; -use itertools::Itertools; -use pageserver_api::key::{ - dbdir_key_range, rel_block_to_key, rel_dir_to_key, rel_key_range, rel_size_to_key, - rel_tag_sparse_key_range, relmap_file_key, repl_origin_key, repl_origin_key_range, - slru_block_to_key, slru_dir_to_key, slru_segment_key_range, slru_segment_size_to_key, - twophase_file_key, twophase_key_range, CompactKey, RelDirExists, AUX_FILES_KEY, CHECKPOINT_KEY, - CONTROLFILE_KEY, DBDIR_KEY, TWOPHASEDIR_KEY, -}; -use pageserver_api::key::{rel_tag_sparse_key, Key}; -use pageserver_api::keyspace::SparseKeySpace; -use pageserver_api::record::NeonWalRecord; -use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; -use pageserver_api::shard::ShardIdentity; -use pageserver_api::value::Value; -use postgres_ffi::relfile_utils::{FSM_FORKNUM, VISIBILITYMAP_FORKNUM}; -use postgres_ffi::BLCKSZ; -use postgres_ffi::{Oid, RepOriginId, TimestampTz, TransactionId}; -use serde::{Deserialize, Serialize}; -use std::collections::{hash_map, BTreeMap, HashMap, HashSet}; -use std::ops::ControlFlow; -use std::ops::Range; -use strum::IntoEnumIterator; -use tokio_util::sync::CancellationToken; -use tracing::{debug, info, trace, warn}; -use utils::bin_ser::DeserializeError; -use utils::pausable_failpoint; -use utils::{bin_ser::BeSer, lsn::Lsn}; -use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta}; /// Max delta records appended to the AUX_FILES_KEY (for aux v1). The write path will write a full image once this threshold is reached. pub const MAX_AUX_FILE_DELTAS: usize = 1024; @@ -327,16 +326,16 @@ impl Timeline { let clone = match &res { Ok(buf) => Ok(buf.clone()), Err(err) => Err(match err { - PageReconstructError::Cancelled => { - PageReconstructError::Cancelled - } + PageReconstructError::Cancelled => PageReconstructError::Cancelled, - x @ PageReconstructError::Other(_) | - x @ PageReconstructError::AncestorLsnTimeout(_) | - x @ PageReconstructError::WalRedo(_) | - x @ PageReconstructError::MissingKey(_) => { - PageReconstructError::Other(anyhow::anyhow!("there was more than one request for this key in the batch, error logged once: {x:?}")) - }, + x @ PageReconstructError::Other(_) + | x @ PageReconstructError::AncestorLsnTimeout(_) + | x @ PageReconstructError::WalRedo(_) + | x @ PageReconstructError::MissingKey(_) => { + PageReconstructError::Other(anyhow::anyhow!( + "there was more than one request for this key in the batch, error logged once: {x:?}" + )) + } }), }; @@ -355,23 +354,23 @@ impl Timeline { // this whole `match` is a lot like `From for PageReconstructError` // but without taking ownership of the GetVectoredError let err = match &err { - GetVectoredError::Cancelled => { - Err(PageReconstructError::Cancelled) - } + GetVectoredError::Cancelled => Err(PageReconstructError::Cancelled), // TODO: restructure get_vectored API to make this error per-key GetVectoredError::MissingKey(err) => { - Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more of the requested keys were missing: {err:?}"))) + Err(PageReconstructError::Other(anyhow::anyhow!( + "whole vectored get request failed because one or more of the requested keys were missing: {err:?}" + ))) } // TODO: restructure get_vectored API to make this error per-key GetVectoredError::GetReadyAncestorError(err) => { - Err(PageReconstructError::Other(anyhow::anyhow!("whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}"))) + Err(PageReconstructError::Other(anyhow::anyhow!( + "whole vectored get request failed because one or more key required ancestor that wasn't ready: {err:?}" + ))) } // TODO: restructure get_vectored API to make this error per-key - GetVectoredError::Other(err) => { - Err(PageReconstructError::Other( - anyhow::anyhow!("whole vectored get request failed: {err:?}"), - )) - } + GetVectoredError::Other(err) => Err(PageReconstructError::Other( + anyhow::anyhow!("whole vectored get request failed: {err:?}"), + )), // TODO: we can prevent this error class by moving this check into the type system GetVectoredError::InvalidLsn(e) => { Err(anyhow::anyhow!("invalid LSN: {e:?}").into()) @@ -379,10 +378,7 @@ impl Timeline { // NB: this should never happen in practice because we limit MAX_GET_VECTORED_KEYS // TODO: we can prevent this error class by moving this check into the type system GetVectoredError::Oversized(err) => { - Err(anyhow::anyhow!( - "batching oversized: {err:?}" - ) - .into()) + Err(anyhow::anyhow!("batching oversized: {err:?}").into()) } }; @@ -715,7 +711,10 @@ impl Timeline { { Ok(res) => res, Err(PageReconstructError::MissingKey(e)) => { - warn!("Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", e); + warn!( + "Missing key while find_lsn_for_timestamp. Either we might have already garbage-collected that data or the key is really missing. Last error: {:#}", + e + ); // Return that we didn't find any requests smaller than the LSN, and logging the error. return Ok(LsnForTimestamp::Past(min_lsn)); } @@ -2464,10 +2463,12 @@ impl DatadirModification<'_> { // modifications before ingesting DB create operations, which are the only kind that reads // data pages during ingest. if cfg!(debug_assertions) { - assert!(!self - .pending_data_batch - .as_ref() - .is_some_and(|b| b.updates_key(&key))); + assert!( + !self + .pending_data_batch + .as_ref() + .is_some_and(|b| b.updates_key(&key)) + ); } } @@ -2666,15 +2667,14 @@ static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]); #[cfg(test)] mod tests { use hex_literal::hex; - use pageserver_api::{models::ShardParameters, shard::ShardStripeSize}; - use utils::{ - id::TimelineId, - shard::{ShardCount, ShardNumber}, - }; + use pageserver_api::models::ShardParameters; + use pageserver_api::shard::ShardStripeSize; + use utils::id::TimelineId; + use utils::shard::{ShardCount, ShardNumber}; use super::*; - - use crate::{tenant::harness::TenantHarness, DEFAULT_PG_VERSION}; + use crate::DEFAULT_PG_VERSION; + use crate::tenant::harness::TenantHarness; /// Test a round trip of aux file updates, from DatadirModification to reading back from the Timeline #[tokio::test] diff --git a/pageserver/src/statvfs.rs b/pageserver/src/statvfs.rs index 4e8be58d58..85c2ed8499 100644 --- a/pageserver/src/statvfs.rs +++ b/pageserver/src/statvfs.rs @@ -73,11 +73,10 @@ impl Statvfs { pub mod mock { use camino::Utf8Path; + pub use pageserver_api::config::statvfs::mock::Behavior; use regex::Regex; use tracing::log::info; - pub use pageserver_api::config::statvfs::mock::Behavior; - pub fn get(tenants_dir: &Utf8Path, behavior: &Behavior) -> nix::Result { info!("running mocked statvfs"); @@ -85,7 +84,7 @@ pub mod mock { Behavior::Success { blocksize, total_blocks, - ref name_filter, + name_filter, } => { let used_bytes = walk_dir_disk_usage(tenants_dir, name_filter.as_deref()).unwrap(); @@ -134,7 +133,7 @@ pub mod mock { } Err(e) => { return Err(anyhow::Error::new(e) - .context(format!("get metadata of {:?}", entry.path()))) + .context(format!("get metadata of {:?}", entry.path()))); } }; total += m.len(); diff --git a/pageserver/src/task_mgr.rs b/pageserver/src/task_mgr.rs index cc93a06ccd..0b71b2cf5b 100644 --- a/pageserver/src/task_mgr.rs +++ b/pageserver/src/task_mgr.rs @@ -40,15 +40,12 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use futures::FutureExt; +use once_cell::sync::Lazy; use pageserver_api::shard::TenantShardId; use tokio::task::JoinHandle; use tokio::task_local; use tokio_util::sync::CancellationToken; - use tracing::{debug, error, info, warn}; - -use once_cell::sync::Lazy; - use utils::env; use utils::id::TimelineId; diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 46f9c9a427..71dc3c9075 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -12,150 +12,99 @@ //! parent timeline, and the last LSN that has been written to disk. //! -use anyhow::{bail, Context}; +use std::collections::hash_map::Entry; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::fmt::{Debug, Display}; +use std::fs::File; +use std::future::Future; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::{Arc, Mutex, Weak}; +use std::time::{Duration, Instant, SystemTime}; +use std::{fmt, fs}; + +use anyhow::{Context, bail}; use arc_swap::ArcSwap; -use camino::Utf8Path; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use chrono::NaiveDateTime; use enumset::EnumSet; -use futures::stream::FuturesUnordered; use futures::StreamExt; +use futures::stream::FuturesUnordered; use itertools::Itertools as _; +use once_cell::sync::Lazy; use pageserver_api::models; -use pageserver_api::models::CompactInfoResponse; -use pageserver_api::models::LsnLease; -use pageserver_api::models::TimelineArchivalState; -use pageserver_api::models::TimelineState; -use pageserver_api::models::TopTenantShardItem; -use pageserver_api::models::WalRedoManagerStatus; -use pageserver_api::shard::ShardIdentity; -use pageserver_api::shard::ShardStripeSize; -use pageserver_api::shard::TenantShardId; -use remote_storage::DownloadError; -use remote_storage::GenericRemoteStorage; -use remote_storage::TimeoutOrCancel; +pub use pageserver_api::models::TenantState; +use pageserver_api::models::{ + CompactInfoResponse, LsnLease, TimelineArchivalState, TimelineState, TopTenantShardItem, + WalRedoManagerStatus, +}; +use pageserver_api::shard::{ShardIdentity, ShardStripeSize, TenantShardId}; +use remote_storage::{DownloadError, GenericRemoteStorage, TimeoutOrCancel}; use remote_timeline_client::index::GcCompactionState; use remote_timeline_client::manifest::{ - OffloadedTimelineManifest, TenantManifest, LATEST_TENANT_MANIFEST_VERSION, + LATEST_TENANT_MANIFEST_VERSION, OffloadedTimelineManifest, TenantManifest, }; -use remote_timeline_client::UploadQueueNotReadyError; -use remote_timeline_client::FAILED_REMOTE_OP_RETRIES; -use remote_timeline_client::FAILED_UPLOAD_WARN_THRESHOLD; -use secondary::heatmap::HeatMapTenant; -use secondary::heatmap::HeatMapTimeline; -use std::collections::BTreeMap; -use std::fmt; -use std::future::Future; -use std::sync::atomic::AtomicBool; -use std::sync::Weak; -use std::time::SystemTime; +use remote_timeline_client::{ + FAILED_REMOTE_OP_RETRIES, FAILED_UPLOAD_WARN_THRESHOLD, UploadQueueNotReadyError, +}; +use secondary::heatmap::{HeatMapTenant, HeatMapTimeline}; use storage_broker::BrokerClientChannel; -use timeline::compaction::CompactionOutcome; -use timeline::compaction::GcCompactionQueue; -use timeline::import_pgdata; -use timeline::offload::offload_timeline; -use timeline::offload::OffloadError; -use timeline::CompactFlags; -use timeline::CompactOptions; -use timeline::CompactionError; -use timeline::PreviousHeatmap; -use timeline::ShutdownMode; +use timeline::compaction::{CompactionOutcome, GcCompactionQueue}; +use timeline::offload::{OffloadError, offload_timeline}; +use timeline::{ + CompactFlags, CompactOptions, CompactionError, PreviousHeatmap, ShutdownMode, import_pgdata, +}; use tokio::io::BufReader; -use tokio::sync::watch; -use tokio::sync::Notify; +use tokio::sync::{Notify, Semaphore, watch}; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::*; use upload_queue::NotInitialized; -use utils::backoff; use utils::circuit_breaker::CircuitBreaker; -use utils::completion; use utils::crashsafe::path_with_suffix_extension; -use utils::failpoint_support; -use utils::fs_ext; -use utils::pausable_failpoint; -use utils::sync::gate::Gate; -use utils::sync::gate::GateGuard; -use utils::timeout::timeout_cancellable; -use utils::timeout::TimeoutCancellableError; +use utils::sync::gate::{Gate, GateGuard}; +use utils::timeout::{TimeoutCancellableError, timeout_cancellable}; use utils::try_rcu::ArcSwapExt; -use utils::zstd::create_zst_tarball; -use utils::zstd::extract_zst_tarball; +use utils::zstd::{create_zst_tarball, extract_zst_tarball}; +use utils::{backoff, completion, failpoint_support, fs_ext, pausable_failpoint}; -use self::config::AttachedLocationConfig; -use self::config::AttachmentMode; -use self::config::LocationConf; -use self::config::TenantConf; +use self::config::{AttachedLocationConfig, AttachmentMode, LocationConf, TenantConf}; use self::metadata::TimelineMetadata; -use self::mgr::GetActiveTenantError; -use self::mgr::GetTenantError; +use self::mgr::{GetActiveTenantError, GetTenantError}; use self::remote_timeline_client::upload::{upload_index_part, upload_tenant_manifest}; use self::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError}; -use self::timeline::uninit::TimelineCreateGuard; -use self::timeline::uninit::TimelineExclusionError; -use self::timeline::uninit::UninitializedTimeline; -use self::timeline::EvictionTaskTenantState; -use self::timeline::GcCutoffs; -use self::timeline::TimelineDeleteProgress; -use self::timeline::TimelineResources; -use self::timeline::WaitLsnError; +use self::timeline::uninit::{TimelineCreateGuard, TimelineExclusionError, UninitializedTimeline}; +use self::timeline::{ + EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError, +}; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; -use crate::deletion_queue::DeletionQueueClient; -use crate::deletion_queue::DeletionQueueError; -use crate::import_datadir; +use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; use crate::l0_flush::L0FlushGlobalState; -use crate::metrics::CONCURRENT_INITDBS; -use crate::metrics::INITDB_RUN_TIME; -use crate::metrics::INITDB_SEMAPHORE_ACQUISITION_TIME; -use crate::metrics::TENANT; use crate::metrics::{ - remove_tenant_metrics, BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, - TENANT_STATE_METRIC, TENANT_SYNTHETIC_SIZE_METRIC, + BROKEN_TENANTS_SET, CIRCUIT_BREAKERS_BROKEN, CIRCUIT_BREAKERS_UNBROKEN, CONCURRENT_INITDBS, + INITDB_RUN_TIME, INITDB_SEMAPHORE_ACQUISITION_TIME, TENANT, TENANT_STATE_METRIC, + TENANT_SYNTHETIC_SIZE_METRIC, remove_tenant_metrics, }; -use crate::task_mgr; use crate::task_mgr::TaskKind; -use crate::tenant::config::LocationMode; -use crate::tenant::config::TenantConfOpt; +use crate::tenant::config::{LocationMode, TenantConfOpt}; use crate::tenant::gc_result::GcResult; pub use crate::tenant::remote_timeline_client::index::IndexPart; -use crate::tenant::remote_timeline_client::remote_initdb_archive_path; -use crate::tenant::remote_timeline_client::MaybeDeletedIndexPart; -use crate::tenant::remote_timeline_client::INITDB_PATH; -use crate::tenant::storage_layer::DeltaLayer; -use crate::tenant::storage_layer::ImageLayer; -use crate::walingest::WalLagCooldown; -use crate::walredo; -use crate::InitializationOrder; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::collections::HashSet; -use std::fmt::Debug; -use std::fmt::Display; -use std::fs; -use std::fs::File; -use std::sync::atomic::{AtomicU64, Ordering}; -use std::sync::Arc; -use std::sync::Mutex; -use std::time::{Duration, Instant}; - -use crate::span; +use crate::tenant::remote_timeline_client::{ + INITDB_PATH, MaybeDeletedIndexPart, remote_initdb_archive_path, +}; +use crate::tenant::storage_layer::{DeltaLayer, ImageLayer}; use crate::tenant::timeline::delete::DeleteTimelineFlow; use crate::tenant::timeline::uninit::cleanup_timeline_directory; use crate::virtual_file::VirtualFile; +use crate::walingest::WalLagCooldown; use crate::walredo::PostgresRedoManager; -use crate::TEMP_FILE_SUFFIX; -use once_cell::sync::Lazy; -pub use pageserver_api::models::TenantState; -use tokio::sync::Semaphore; +use crate::{InitializationOrder, TEMP_FILE_SUFFIX, import_datadir, span, task_mgr, walredo}; static INIT_DB_SEMAPHORE: Lazy = Lazy::new(|| Semaphore::new(8)); -use utils::{ - crashsafe, - generation::Generation, - id::TimelineId, - lsn::{Lsn, RecordLsn}, -}; +use utils::crashsafe; +use utils::generation::Generation; +use utils::id::TimelineId; +use utils::lsn::{Lsn, RecordLsn}; pub mod blob_io; pub mod block_io; @@ -184,9 +133,9 @@ mod gc_block; mod gc_result; pub(crate) mod throttle; -pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; pub(crate) use timeline::{LogicalSizeCalculationCause, PageReconstructError, Timeline}; +pub(crate) use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; // re-export for use in walreceiver pub use crate::tenant::timeline::WalReceiverInfo; @@ -251,7 +200,9 @@ impl AttachedTenantConf { Ok(Self::new(location_conf.tenant_conf, *attach_conf)) } LocationMode::Secondary(_) => { - anyhow::bail!("Attempted to construct AttachedTenantConf from a LocationConf in secondary mode") + anyhow::bail!( + "Attempted to construct AttachedTenantConf from a LocationConf in secondary mode" + ) } } } @@ -465,7 +416,9 @@ impl WalredoManagerId { static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1); let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); if id == 0 { - panic!("WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique"); + panic!( + "WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique" + ); } Self(id) } @@ -1229,7 +1182,9 @@ impl Tenant { match cause { LoadTimelineCause::Attach | LoadTimelineCause::Unoffload => (), LoadTimelineCause::ImportPgdata { .. } => { - unreachable!("ImportPgdata should not be reloading timeline import is done and persisted as such in s3") + unreachable!( + "ImportPgdata should not be reloading timeline import is done and persisted as such in s3" + ) } } let mut guard = self.timelines_creating.lock().unwrap(); @@ -1262,8 +1217,8 @@ impl Tenant { // We should never try and load the same timeline twice during startup Entry::Occupied(_) => { unreachable!( - "Timeline {tenant_id}/{timeline_id} already exists in the tenant map" - ); + "Timeline {tenant_id}/{timeline_id} already exists in the tenant map" + ); } Entry::Vacant(v) => { v.insert(Arc::clone(&timeline)); @@ -1657,7 +1612,9 @@ impl Tenant { failpoint_support::sleep_millis_async!("before-attaching-tenant"); let Some(preload) = preload else { - anyhow::bail!("local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624"); + anyhow::bail!( + "local-only deployment is no longer supported, https://github.com/neondatabase/neon/issues/5624" + ); }; let mut offloaded_timeline_ids = HashSet::new(); @@ -2041,7 +1998,7 @@ impl Tenant { remote_storage: GenericRemoteStorage, previous_heatmap: Option, cancel: CancellationToken, - ) -> impl Future { + ) -> impl Future + use<> { let client = self.build_timeline_client(timeline_id, remote_storage); async move { debug_assert_current_span_has_tenant_and_timeline_id(); @@ -2736,7 +2693,9 @@ impl Tenant { timeline } CreateTimelineResult::ImportSpawned(timeline) => { - info!("import task spawned, timeline will become visible and activated once the import is done"); + info!( + "import task spawned, timeline will become visible and activated once the import is done" + ); timeline } }; @@ -2782,7 +2741,7 @@ impl Tenant { { StartCreatingTimelineResult::CreateGuard(guard) => guard, StartCreatingTimelineResult::Idempotent(timeline) => { - return Ok(CreateTimelineResult::Idempotent(timeline)) + return Ok(CreateTimelineResult::Idempotent(timeline)); } }; @@ -2916,7 +2875,9 @@ impl Tenant { let index_part = match index_part { MaybeDeletedIndexPart::Deleted(_) => { // likely concurrent delete call, cplane should prevent this - anyhow::bail!("index part says deleted but we are not done creating yet, this should not happen but") + anyhow::bail!( + "index part says deleted but we are not done creating yet, this should not happen but" + ) } MaybeDeletedIndexPart::IndexPart(p) => p, }; @@ -3907,7 +3868,9 @@ where if !later.is_empty() { for (missing_id, orphan_ids) in later { for (orphan_id, _) in orphan_ids { - error!("could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded"); + error!( + "could not load timeline {orphan_id} because its ancestor timeline {missing_id} could not be loaded" + ); } } bail!("could not load tenant because some timelines are missing ancestors"); @@ -4827,7 +4790,10 @@ impl Tenant { let gc_info = src_timeline.gc_info.read().unwrap(); let planned_cutoff = gc_info.min_cutoff(); if gc_info.lsn_covered_by_lease(start_lsn) { - tracing::info!("skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", *applied_gc_cutoff_lsn); + tracing::info!( + "skipping comparison of {start_lsn} with gc cutoff {} and planned gc cutoff {planned_cutoff} due to lsn lease", + *applied_gc_cutoff_lsn + ); } else { src_timeline .check_lsn_is_in_scope(start_lsn, &applied_gc_cutoff_lsn) @@ -4973,7 +4939,9 @@ impl Tenant { } // Idempotent <=> CreateTimelineIdempotency is identical (x, y) if x == y => { - info!("timeline already exists and idempotency matches, succeeding request"); + info!( + "timeline already exists and idempotency matches, succeeding request" + ); // fallthrough } (_, _) => { @@ -5055,7 +5023,7 @@ impl Tenant { { StartCreatingTimelineResult::CreateGuard(guard) => guard, StartCreatingTimelineResult::Idempotent(timeline) => { - return Ok(CreateTimelineResult::Idempotent(timeline)) + return Ok(CreateTimelineResult::Idempotent(timeline)); } }; @@ -5260,7 +5228,9 @@ impl Tenant { .create_timeline_files(&create_guard.timeline_path) .await { - error!("Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}"); + error!( + "Failed to create initial files for timeline {tenant_shard_id}/{new_timeline_id}, cleaning up: {e:?}" + ); cleanup_timeline_directory(create_guard); return Err(e); } @@ -5625,20 +5595,19 @@ pub async fn dump_layerfile_from_path( #[cfg(test)] pub(crate) mod harness { use bytes::{Bytes, BytesMut}; + use hex_literal::hex; use once_cell::sync::OnceCell; + use pageserver_api::key::Key; use pageserver_api::models::ShardParameters; + use pageserver_api::record::NeonWalRecord; use pageserver_api::shard::ShardIndex; + use utils::id::TenantId; use utils::logging; + use super::*; use crate::deletion_queue::mock::MockDeletionQueue; use crate::l0_flush::L0FlushConfig; use crate::walredo::apply_neon; - use pageserver_api::key::Key; - use pageserver_api::record::NeonWalRecord; - - use super::*; - use hex_literal::hex; - use utils::id::TenantId; pub const TIMELINE_ID: TimelineId = TimelineId::from_array(hex!("11223344556677881122334455667788")); @@ -5919,34 +5888,34 @@ pub(crate) mod harness { mod tests { use std::collections::{BTreeMap, BTreeSet}; - use super::*; - use crate::keyspace::KeySpaceAccum; - use crate::tenant::harness::*; - use crate::tenant::timeline::CompactFlags; - use crate::DEFAULT_PG_VERSION; use bytes::{Bytes, BytesMut}; use hex_literal::hex; use itertools::Itertools; - use pageserver_api::key::{Key, AUX_KEY_PREFIX, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX}; + #[cfg(feature = "testing")] + use models::CompactLsnRange; + use pageserver_api::key::{AUX_KEY_PREFIX, Key, NON_INHERITED_RANGE, RELATION_SIZE_PREFIX}; use pageserver_api::keyspace::KeySpace; use pageserver_api::models::{CompactionAlgorithm, CompactionAlgorithmSettings}; + #[cfg(feature = "testing")] + use pageserver_api::record::NeonWalRecord; use pageserver_api::value::Value; use pageserver_compaction::helpers::overlaps_with; - use rand::{thread_rng, Rng}; + use rand::{Rng, thread_rng}; use storage_layer::{IoConcurrency, PersistentLayerKey}; use tests::storage_layer::ValuesReconstructState; use tests::timeline::{GetVectoredError, ShutdownMode}; + #[cfg(feature = "testing")] + use timeline::GcInfo; + #[cfg(feature = "testing")] + use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn}; use timeline::{CompactOptions, DeltaLayerTestDesc}; use utils::id::TenantId; - #[cfg(feature = "testing")] - use models::CompactLsnRange; - #[cfg(feature = "testing")] - use pageserver_api::record::NeonWalRecord; - #[cfg(feature = "testing")] - use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn}; - #[cfg(feature = "testing")] - use timeline::GcInfo; + use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::keyspace::KeySpaceAccum; + use crate::tenant::harness::*; + use crate::tenant::timeline::CompactFlags; static TEST_KEY: Lazy = Lazy::new(|| Key::from_slice(&hex!("010000000033333333444444445500000001"))); @@ -6196,11 +6165,12 @@ mod tests { panic!("wrong error type") }; assert!(err.to_string().contains("invalid branch start lsn")); - assert!(err - .source() - .unwrap() - .to_string() - .contains("we might've already garbage collected needed data")) + assert!( + err.source() + .unwrap() + .to_string() + .contains("we might've already garbage collected needed data") + ) } } @@ -6229,11 +6199,12 @@ mod tests { panic!("wrong error type"); }; assert!(&err.to_string().contains("invalid branch start lsn")); - assert!(&err - .source() - .unwrap() - .to_string() - .contains("is earlier than latest GC cutoff")); + assert!( + &err.source() + .unwrap() + .to_string() + .contains("is earlier than latest GC cutoff") + ); } } @@ -7542,10 +7513,12 @@ mod tests { } } - assert!(!harness - .conf - .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID) - .exists()); + assert!( + !harness + .conf + .timeline_path(&tenant.tenant_shard_id, &TIMELINE_ID) + .exists() + ); Ok(()) } @@ -7746,7 +7719,10 @@ mod tests { let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len(); - assert!(after_num_l0_delta_files < before_num_l0_delta_files, "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}"); + assert!( + after_num_l0_delta_files < before_num_l0_delta_files, + "after_num_l0_delta_files={after_num_l0_delta_files}, before_num_l0_delta_files={before_num_l0_delta_files}" + ); assert_eq!( tline.get(test_key, lsn, &ctx).await?, @@ -7913,7 +7889,10 @@ mod tests { let (_, after_delta_file_accessed) = scan_with_statistics(&tline, &keyspace, lsn, &ctx, io_concurrency.clone()) .await?; - assert!(after_delta_file_accessed < before_delta_file_accessed, "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}"); + assert!( + after_delta_file_accessed < before_delta_file_accessed, + "after_delta_file_accessed={after_delta_file_accessed}, before_delta_file_accessed={before_delta_file_accessed}" + ); // Given that we already produced an image layer, there should be no delta layer needed for the scan, but still setting a low threshold there for unforeseen circumstances. assert!( after_delta_file_accessed <= 2, @@ -7967,10 +7946,12 @@ mod tests { get_vectored_impl_wrapper(&tline, base_key, lsn, &ctx).await?, Some(test_img("data key 1")) ); - assert!(get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx) - .await - .unwrap_err() - .is_missing_key_error()); + assert!( + get_vectored_impl_wrapper(&tline, base_key_child, lsn, &ctx) + .await + .unwrap_err() + .is_missing_key_error() + ); assert!( get_vectored_impl_wrapper(&tline, base_key_nonexist, lsn, &ctx) .await diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index 7b55df52a5..b16a88eaa4 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -14,6 +14,9 @@ //! len < 128: 0XXXXXXX //! len >= 128: 1CCCXXXX XXXXXXXX XXXXXXXX XXXXXXXX //! +use std::cmp::min; +use std::io::{Error, ErrorKind}; + use async_compression::Level; use bytes::{BufMut, BytesMut}; use pageserver_api::models::ImageCompressionAlgorithm; @@ -24,10 +27,8 @@ use tracing::warn; use crate::context::RequestContext; use crate::page_cache::PAGE_SZ; use crate::tenant::block_io::BlockCursor; -use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt}; use crate::virtual_file::VirtualFile; -use std::cmp::min; -use std::io::{Error, ErrorKind}; +use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt}; #[derive(Copy, Clone, Debug)] pub struct CompressionInfo { @@ -414,12 +415,15 @@ impl BlobWriter { #[cfg(test)] pub(crate) mod tests { - use super::*; - use crate::{context::DownloadBehavior, task_mgr::TaskKind, tenant::block_io::BlockReaderRef}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; use rand::{Rng, SeedableRng}; + use super::*; + use crate::context::DownloadBehavior; + use crate::task_mgr::TaskKind; + use crate::tenant::block_io::BlockReaderRef; + async fn round_trip_test(blobs: &[Vec]) -> Result<(), Error> { round_trip_test_compressed::(blobs, false).await } @@ -486,7 +490,7 @@ pub(crate) mod tests { pub(crate) fn random_array(len: usize) -> Vec { let mut rng = rand::thread_rng(); - (0..len).map(|_| rng.gen()).collect::<_>() + (0..len).map(|_| rng.r#gen()).collect::<_>() } #[tokio::test] @@ -544,9 +548,9 @@ pub(crate) mod tests { let mut rng = rand::rngs::StdRng::seed_from_u64(42); let blobs = (0..1024) .map(|_| { - let mut sz: u16 = rng.gen(); + let mut sz: u16 = rng.r#gen(); // Make 50% of the arrays small - if rng.gen() { + if rng.r#gen() { sz &= 63; } random_array(sz.into()) diff --git a/pageserver/src/tenant/block_io.rs b/pageserver/src/tenant/block_io.rs index 990211f80a..66c586daff 100644 --- a/pageserver/src/tenant/block_io.rs +++ b/pageserver/src/tenant/block_io.rs @@ -2,14 +2,16 @@ //! Low-level Block-oriented I/O functions //! +use std::ops::Deref; + +use bytes::Bytes; + use super::storage_layer::delta_layer::{Adapter, DeltaLayerInner}; use crate::context::RequestContext; -use crate::page_cache::{self, FileId, PageReadGuard, PageWriteGuard, ReadBufResult, PAGE_SZ}; +use crate::page_cache::{self, FileId, PAGE_SZ, PageReadGuard, PageWriteGuard, ReadBufResult}; #[cfg(test)] use crate::virtual_file::IoBufferMut; use crate::virtual_file::VirtualFile; -use bytes::Bytes; -use std::ops::Deref; /// This is implemented by anything that can read 8 kB (PAGE_SZ) /// blocks, using the page cache diff --git a/pageserver/src/tenant/checks.rs b/pageserver/src/tenant/checks.rs index f98356242e..d5b979ab2a 100644 --- a/pageserver/src/tenant/checks.rs +++ b/pageserver/src/tenant/checks.rs @@ -63,9 +63,9 @@ pub fn check_valid_layermap(metadata: &[LayerName]) -> Option { && overlaps_with(&layer.key_range, &other_layer.key_range) { let err = format!( - "layer violates the layer map LSN split assumption: layer {} intersects with layer {}", - layer, other_layer - ); + "layer violates the layer map LSN split assumption: layer {} intersects with layer {}", + layer, other_layer + ); return Some(err); } } diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index ab4c4c935d..334fb04604 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -8,16 +8,17 @@ //! We cannot use global or default config instead, because wrong settings //! may lead to a data loss. //! +use std::num::NonZeroU64; +use std::time::Duration; + pub(crate) use pageserver_api::config::TenantConfigToml as TenantConf; -use pageserver_api::models::CompactionAlgorithmSettings; -use pageserver_api::models::EvictionPolicy; -use pageserver_api::models::{self, TenantConfigPatch}; +use pageserver_api::models::{ + self, CompactionAlgorithmSettings, EvictionPolicy, TenantConfigPatch, +}; use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize}; use serde::de::IntoDeserializer; use serde::{Deserialize, Serialize}; use serde_json::Value; -use std::num::NonZeroU64; -use std::time::Duration; use utils::generation::Generation; use utils::postgres_client::PostgresClientProtocol; @@ -739,9 +740,10 @@ impl From for models::TenantConfig { #[cfg(test)] mod tests { - use super::*; use models::TenantConfig; + use super::*; + #[test] fn de_serializing_pageserver_config_omits_empty_values() { let small_conf = TenantConfOpt { diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs index bb9df020b5..73c105b34e 100644 --- a/pageserver/src/tenant/disk_btree.rs +++ b/pageserver/src/tenant/disk_btree.rs @@ -18,27 +18,23 @@ //! - An Iterator interface would be more convenient for the callers than the //! 'visit' function //! +use std::cmp::Ordering; +use std::iter::Rev; +use std::ops::{Range, RangeInclusive}; +use std::{io, result}; + use async_stream::try_stream; -use byteorder::{ReadBytesExt, BE}; +use byteorder::{BE, ReadBytesExt}; use bytes::{BufMut, Bytes, BytesMut}; use either::Either; use futures::{Stream, StreamExt}; use hex; -use std::{ - cmp::Ordering, - io, - iter::Rev, - ops::{Range, RangeInclusive}, - result, -}; use thiserror::Error; use tracing::error; -use crate::{ - context::{DownloadBehavior, RequestContext}, - task_mgr::TaskKind, - tenant::block_io::{BlockReader, BlockWriter}, -}; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::TaskKind; +use crate::tenant::block_io::{BlockReader, BlockWriter}; // The maximum size of a value stored in the B-tree. 5 bytes is enough currently. pub const VALUE_SZ: usize = 5; @@ -833,12 +829,14 @@ impl BuildNode { #[cfg(test)] pub(crate) mod tests { - use super::*; - use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef}; - use rand::Rng; use std::collections::BTreeMap; use std::sync::atomic::{AtomicUsize, Ordering}; + use rand::Rng; + + use super::*; + use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef}; + #[derive(Clone, Default)] pub(crate) struct TestDisk { blocks: Vec, @@ -1115,7 +1113,7 @@ pub(crate) mod tests { // Test get() operations on random keys, most of which will not exist for _ in 0..100000 { - let key_int = rand::thread_rng().gen::(); + let key_int = rand::thread_rng().r#gen::(); let search_key = u128::to_be_bytes(key_int); assert!(reader.get(&search_key, &ctx).await? == all_data.get(&key_int).cloned()); } diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index ba79672bc7..cb25fa6185 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -1,6 +1,17 @@ //! Implementation of append-only file data structure //! used to keep in-memory layers spilled on disk. +use std::io; +use std::sync::Arc; +use std::sync::atomic::AtomicU64; + +use camino::Utf8PathBuf; +use num_traits::Num; +use pageserver_api::shard::TenantShardId; +use tokio_epoll_uring::{BoundedBuf, Slice}; +use tracing::error; +use utils::id::TimelineId; + use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64}; use crate::config::PageServerConf; use crate::context::RequestContext; @@ -9,17 +20,7 @@ use crate::tenant::storage_layer::inmemory_layer::vectored_dio_read::File; use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut; use crate::virtual_file::owned_buffers_io::slice::SliceMutExt; use crate::virtual_file::owned_buffers_io::write::Buffer; -use crate::virtual_file::{self, owned_buffers_io, IoBufferMut, VirtualFile}; -use camino::Utf8PathBuf; -use num_traits::Num; -use pageserver_api::shard::TenantShardId; -use tokio_epoll_uring::{BoundedBuf, Slice}; -use tracing::error; - -use std::io; -use std::sync::atomic::AtomicU64; -use std::sync::Arc; -use utils::id::TimelineId; +use crate::virtual_file::{self, IoBufferMut, VirtualFile, owned_buffers_io}; pub struct EphemeralFile { _tenant_shard_id: TenantShardId, @@ -319,13 +320,14 @@ pub fn is_ephemeral_file(filename: &str) -> bool { #[cfg(test)] mod tests { + use std::fs; + use std::str::FromStr; + use rand::Rng; use super::*; use crate::context::DownloadBehavior; use crate::task_mgr::TaskKind; - use std::fs; - use std::str::FromStr; fn harness( test_name: &str, diff --git a/pageserver/src/tenant/gc_block.rs b/pageserver/src/tenant/gc_block.rs index af73acb2be..7aa920c953 100644 --- a/pageserver/src/tenant/gc_block.rs +++ b/pageserver/src/tenant/gc_block.rs @@ -1,4 +1,5 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; +use std::sync::Arc; use utils::id::TimelineId; diff --git a/pageserver/src/tenant/gc_result.rs b/pageserver/src/tenant/gc_result.rs index c805aafeab..7a7d6d19cb 100644 --- a/pageserver/src/tenant/gc_result.rs +++ b/pageserver/src/tenant/gc_result.rs @@ -1,8 +1,9 @@ -use anyhow::Result; -use serde::Serialize; use std::ops::AddAssign; use std::time::Duration; +use anyhow::Result; +use serde::Serialize; + /// /// Result of performing GC /// diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index a69cce932e..59f5a6bd90 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -46,24 +46,24 @@ mod historic_layer_coverage; mod layer_coverage; -use crate::context::RequestContext; -use crate::keyspace::KeyPartitioning; -use crate::tenant::storage_layer::InMemoryLayer; -use anyhow::Result; -use pageserver_api::key::Key; -use pageserver_api::keyspace::{KeySpace, KeySpaceAccum}; -use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze}; use std::collections::{HashMap, VecDeque}; use std::iter::Peekable; use std::ops::Range; use std::sync::Arc; + +use anyhow::Result; +use historic_layer_coverage::BufferedHistoricLayerCoverage; +pub use historic_layer_coverage::LayerKey; +use pageserver_api::key::Key; +use pageserver_api::keyspace::{KeySpace, KeySpaceAccum}; +use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze}; use tokio::sync::watch; use utils::lsn::Lsn; -use historic_layer_coverage::BufferedHistoricLayerCoverage; -pub use historic_layer_coverage::LayerKey; - use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc}; +use crate::context::RequestContext; +use crate::keyspace::KeyPartitioning; +use crate::tenant::storage_layer::InMemoryLayer; /// /// LayerMap tracks what layers exist on a timeline. @@ -1066,18 +1066,17 @@ impl LayerMap { #[cfg(test)] mod tests { - use crate::tenant::{storage_layer::LayerName, IndexPart}; - use pageserver_api::{ - key::DBDIR_KEY, - keyspace::{KeySpace, KeySpaceRandomAccum}, - }; - use std::{collections::HashMap, path::PathBuf}; - use utils::{ - id::{TenantId, TimelineId}, - shard::TenantShardId, - }; + use std::collections::HashMap; + use std::path::PathBuf; + + use pageserver_api::key::DBDIR_KEY; + use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum}; + use utils::id::{TenantId, TimelineId}; + use utils::shard::TenantShardId; use super::*; + use crate::tenant::IndexPart; + use crate::tenant::storage_layer::LayerName; #[derive(Clone)] struct LayerDesc { @@ -1417,9 +1416,11 @@ mod tests { assert!(!shadow.ranges.is_empty()); // At least some layers should be marked covered - assert!(layer_visibilities - .iter() - .any(|i| matches!(i.1, LayerVisibilityHint::Covered))); + assert!( + layer_visibilities + .iter() + .any(|i| matches!(i.1, LayerVisibilityHint::Covered)) + ); let layer_visibilities = layer_visibilities.into_iter().collect::>(); diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index 136f68bc36..f8bec48886 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -3,9 +3,8 @@ use std::ops::Range; use tracing::info; -use crate::tenant::storage_layer::PersistentLayerDesc; - use super::layer_coverage::LayerCoverageTuple; +use crate::tenant::storage_layer::PersistentLayerDesc; /// Layers in this module are identified and indexed by this data. /// diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs index 15c6955260..77f9a3579d 100644 --- a/pageserver/src/tenant/metadata.rs +++ b/pageserver/src/tenant/metadata.rs @@ -19,8 +19,9 @@ use anyhow::ensure; use serde::{Deserialize, Serialize}; -use utils::bin_ser::SerializeError; -use utils::{bin_ser::BeSer, id::TimelineId, lsn::Lsn}; +use utils::bin_ser::{BeSer, SerializeError}; +use utils::id::TimelineId; +use utils::lsn::Lsn; /// Use special format number to enable backward compatibility. const METADATA_FORMAT_VERSION: u16 = 4; @@ -345,9 +346,10 @@ impl TimelineMetadata { } pub(crate) mod modern_serde { - use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader}; use serde::{Deserialize, Serialize}; + use super::{TimelineMetadata, TimelineMetadataBodyV2, TimelineMetadataHeader}; + pub(crate) fn deserialize<'de, D>(deserializer: D) -> Result where D: serde::de::Deserializer<'de>, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 22ee560dbf..003f84e640 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1,34 +1,42 @@ //! This module acts as a switchboard to access different repositories managed by this //! page server. -use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf}; -use futures::StreamExt; -use itertools::Itertools; -use pageserver_api::key::Key; -use pageserver_api::models::LocationConfigMode; -use pageserver_api::shard::{ - ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId, -}; -use pageserver_api::upcall_api::ReAttachResponseTenant; -use rand::{distributions::Alphanumeric, Rng}; -use remote_storage::TimeoutOrCancel; use std::borrow::Cow; use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; use std::ops::Deref; use std::sync::Arc; use std::time::Duration; -use sysinfo::SystemExt; -use tokio::fs; use anyhow::Context; +use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf}; +use futures::StreamExt; +use itertools::Itertools; use once_cell::sync::Lazy; +use pageserver_api::key::Key; +use pageserver_api::models::LocationConfigMode; +use pageserver_api::shard::{ + ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId, +}; +use pageserver_api::upcall_api::ReAttachResponseTenant; +use rand::Rng; +use rand::distributions::Alphanumeric; +use remote_storage::TimeoutOrCancel; +use sysinfo::SystemExt; +use tokio::fs; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::*; - +use utils::crashsafe::path_with_suffix_extension; +use utils::fs_ext::PathExt; +use utils::generation::Generation; +use utils::id::{TenantId, TimelineId}; use utils::{backoff, completion, crashsafe}; +use super::remote_timeline_client::remote_tenant_path; +use super::secondary::SecondaryTenant; +use super::timeline::detach_ancestor::{self, PreparedTimelineDetach}; +use super::{GlobalShutDown, TenantSharedResources}; use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::controller_upcall_client::{ @@ -37,7 +45,7 @@ use crate::controller_upcall_client::{ use crate::deletion_queue::DeletionQueueClient; use crate::http::routes::ACTIVE_TENANT_TIMEOUT; use crate::metrics::{TENANT, TENANT_MANAGER as METRICS}; -use crate::task_mgr::{TaskKind, BACKGROUND_RUNTIME}; +use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind}; use crate::tenant::config::{ AttachedLocationConfig, AttachmentMode, LocationConf, LocationMode, SecondaryLocationConfig, }; @@ -48,16 +56,6 @@ use crate::tenant::{AttachedTenantConf, GcError, LoadConfigError, SpawnMode, Ten use crate::virtual_file::MaybeFatalIo; use crate::{InitializationOrder, TEMP_FILE_SUFFIX}; -use utils::crashsafe::path_with_suffix_extension; -use utils::fs_ext::PathExt; -use utils::generation::Generation; -use utils::id::{TenantId, TimelineId}; - -use super::remote_timeline_client::remote_tenant_path; -use super::secondary::SecondaryTenant; -use super::timeline::detach_ancestor::{self, PreparedTimelineDetach}; -use super::{GlobalShutDown, TenantSharedResources}; - /// For a tenant that appears in TenantsMap, it may either be /// - `Attached`: has a full Tenant object, is elegible to service /// reads and ingest WAL. @@ -140,7 +138,7 @@ impl TenantStartupMode { /// If this returns None, the re-attach struct is in an invalid state and /// should be ignored in the response. fn from_reattach_tenant(rart: ReAttachResponseTenant) -> Option { - match (rart.mode, rart.gen) { + match (rart.mode, rart.r#gen) { (LocationConfigMode::Detached, _) => None, (LocationConfigMode::Secondary, _) => Some(Self::Secondary), (LocationConfigMode::AttachedMulti, Some(g)) => { @@ -376,7 +374,7 @@ async fn init_load_generations( TenantStartupMode::Attached((_mode, generation)) => Some(generation), TenantStartupMode::Secondary => None, } - .map(|gen| (*id, *gen)) + .map(|gen_| (*id, *gen_)) }) .collect(); resources.deletion_queue_client.recover(attached_tenants)?; @@ -502,7 +500,9 @@ pub async fn init_tenant_mgr( .total_memory(); let max_ephemeral_layer_bytes = conf.ephemeral_bytes_per_memory_kb as u64 * (system_memory / 1024); - tracing::info!("Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory"); + tracing::info!( + "Initialized ephemeral layer size limit to {max_ephemeral_layer_bytes}, for {system_memory} bytes of memory" + ); inmemory_layer::GLOBAL_RESOURCES.max_dirty_bytes.store( max_ephemeral_layer_bytes, std::sync::atomic::Ordering::Relaxed, @@ -700,10 +700,11 @@ fn tenant_spawn( // to avoid impacting prod runtime performance. assert!(!crate::is_temporary(tenant_path)); debug_assert!(tenant_path.is_dir()); - debug_assert!(conf - .tenant_location_config_path(&tenant_shard_id) - .try_exists() - .unwrap()); + debug_assert!( + conf.tenant_location_config_path(&tenant_shard_id) + .try_exists() + .unwrap() + ); Tenant::spawn( conf, @@ -791,7 +792,9 @@ async fn shutdown_all_tenants0(tenants: &std::sync::RwLock) { (total_in_progress, total_attached) } TenantsMap::ShuttingDown(_) => { - error!("already shutting down, this function isn't supposed to be called more than once"); + error!( + "already shutting down, this function isn't supposed to be called more than once" + ); return; } } @@ -1016,9 +1019,9 @@ impl TenantManager { Ok(Ok(_)) => return Ok(Some(tenant)), Err(_) => { tracing::warn!( - timeout_ms = flush_timeout.as_millis(), - "Timed out waiting for flush to remote storage, proceeding anyway." - ) + timeout_ms = flush_timeout.as_millis(), + "Timed out waiting for flush to remote storage, proceeding anyway." + ) } } } @@ -1194,7 +1197,9 @@ impl TenantManager { } TenantSlot::Attached(tenant) => { let (_guard, progress) = utils::completion::channel(); - info!("Shutting down just-spawned tenant, because tenant manager is shut down"); + info!( + "Shutting down just-spawned tenant, because tenant manager is shut down" + ); match tenant.shutdown(progress, ShutdownMode::Hard).await { Ok(()) => { info!("Finished shutting down just-spawned tenant"); @@ -1784,7 +1789,7 @@ impl TenantManager { _ => { return Err(anyhow::anyhow!(e).context(format!( "Hard linking {relative_layer} into {child_prefix}" - ))) + ))); } } } @@ -2025,8 +2030,8 @@ impl TenantManager { .wait_to_become_active(std::time::Duration::from_secs(9999)) .await .map_err(|e| { - use pageserver_api::models::TenantState; use GetActiveTenantError::{Cancelled, WillNotBecomeActive}; + use pageserver_api::models::TenantState; match e { Cancelled | WillNotBecomeActive(TenantState::Stopping { .. }) => { Error::ShuttingDown @@ -2089,7 +2094,7 @@ impl TenantManager { match selector { ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => { - return ShardResolveResult::Found(tenant.clone()) + return ShardResolveResult::Found(tenant.clone()); } ShardSelector::Page(key) => { // First slot we see for this tenant, calculate the expected shard number @@ -2486,7 +2491,7 @@ impl SlotGuard { TenantsMap::Initializing => { return Err(TenantSlotUpsertError::MapState( TenantMapError::StillInitializing, - )) + )); } TenantsMap::ShuttingDown(_) => { return Err(TenantSlotUpsertError::ShuttingDown(( @@ -2815,21 +2820,22 @@ where } } -use { - crate::tenant::gc_result::GcResult, http_utils::error::ApiError, - pageserver_api::models::TimelineGcRequest, -}; +use http_utils::error::ApiError; +use pageserver_api::models::TimelineGcRequest; + +use crate::tenant::gc_result::GcResult; #[cfg(test)] mod tests { use std::collections::BTreeMap; use std::sync::Arc; + use tracing::Instrument; + use super::super::harness::TenantHarness; + use super::TenantsMap; use crate::tenant::mgr::TenantSlot; - use super::{super::harness::TenantHarness, TenantsMap}; - #[tokio::test(start_paused = true)] async fn shutdown_awaits_in_progress_tenant() { // Test that if an InProgress tenant is in the map during shutdown, the shutdown will gracefully diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index e01da48052..4ba5844fea 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -179,78 +179,64 @@ pub mod index; pub mod manifest; pub(crate) mod upload; -use anyhow::Context; -use camino::Utf8Path; -use chrono::{NaiveDateTime, Utc}; - -pub(crate) use download::download_initdb_tar_zst; -use index::GcCompactionState; -use pageserver_api::models::TimelineArchivalState; -use pageserver_api::shard::{ShardIndex, TenantShardId}; -use regex::Regex; -use scopeguard::ScopeGuard; -use tokio_util::sync::CancellationToken; -use utils::backoff::{ - self, exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, -}; -use utils::pausable_failpoint; -use utils::shard::ShardNumber; - use std::collections::{HashMap, HashSet, VecDeque}; +use std::ops::DerefMut; use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Mutex, OnceLock}; use std::time::Duration; +use anyhow::Context; +use camino::Utf8Path; +use chrono::{NaiveDateTime, Utc}; +pub(crate) use download::{ + download_index_part, download_initdb_tar_zst, download_tenant_manifest, is_temp_download_file, + list_remote_tenant_shards, list_remote_timelines, +}; +use index::GcCompactionState; +pub(crate) use index::LayerFileMetadata; +use pageserver_api::models::TimelineArchivalState; +use pageserver_api::shard::{ShardIndex, TenantShardId}; +use regex::Regex; use remote_storage::{ DownloadError, GenericRemoteStorage, ListingMode, RemotePath, TimeoutOrCancel, }; -use std::ops::DerefMut; -use tracing::{debug, error, info, instrument, warn}; -use tracing::{info_span, Instrument}; -use utils::lsn::Lsn; - -use crate::context::RequestContext; -use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; -use crate::metrics::{ - MeasureRemoteOp, RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics, - RemoteTimelineClientMetricsCallTrackSize, REMOTE_ONDEMAND_DOWNLOADED_BYTES, - REMOTE_ONDEMAND_DOWNLOADED_LAYERS, +use scopeguard::ScopeGuard; +use tokio_util::sync::CancellationToken; +use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; +pub(crate) use upload::upload_initdb_dir; +use utils::backoff::{ + self, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff, }; -use crate::task_mgr::shutdown_token; -use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id; -use crate::tenant::remote_timeline_client::download::download_retry; -use crate::tenant::storage_layer::AsLayerDesc; -use crate::tenant::upload_queue::{Delete, OpType, UploadQueueStoppedDeletable}; -use crate::tenant::TIMELINES_SEGMENT_NAME; -use crate::{ - config::PageServerConf, - task_mgr, - task_mgr::TaskKind, - task_mgr::BACKGROUND_RUNTIME, - tenant::metadata::TimelineMetadata, - tenant::upload_queue::{ - UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, UploadTask, - }, - TENANT_HEATMAP_BASENAME, -}; - use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; +use utils::pausable_failpoint; +use utils::shard::ShardNumber; use self::index::IndexPart; - use super::config::AttachedLocationConfig; use super::metadata::MetadataUpdate; use super::storage_layer::{Layer, LayerName, ResidentLayer}; use super::timeline::import_pgdata; use super::upload_queue::{NotInitialized, SetDeletedFlagProgress}; use super::{DeleteTimelineError, Generation}; - -pub(crate) use download::{ - download_index_part, download_tenant_manifest, is_temp_download_file, - list_remote_tenant_shards, list_remote_timelines, +use crate::config::PageServerConf; +use crate::context::RequestContext; +use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; +use crate::metrics::{ + MeasureRemoteOp, REMOTE_ONDEMAND_DOWNLOADED_BYTES, REMOTE_ONDEMAND_DOWNLOADED_LAYERS, + RemoteOpFileKind, RemoteOpKind, RemoteTimelineClientMetrics, + RemoteTimelineClientMetricsCallTrackSize, }; -pub(crate) use index::LayerFileMetadata; -pub(crate) use upload::upload_initdb_dir; +use crate::task_mgr::{BACKGROUND_RUNTIME, TaskKind, shutdown_token}; +use crate::tenant::metadata::TimelineMetadata; +use crate::tenant::remote_timeline_client::download::download_retry; +use crate::tenant::storage_layer::AsLayerDesc; +use crate::tenant::upload_queue::{ + Delete, OpType, UploadOp, UploadQueue, UploadQueueInitialized, UploadQueueStopped, + UploadQueueStoppedDeletable, UploadTask, +}; +use crate::tenant::{TIMELINES_SEGMENT_NAME, debug_assert_current_span_has_tenant_and_timeline_id}; +use crate::{TENANT_HEATMAP_BASENAME, task_mgr}; // Occasional network issues and such can cause remote operations to fail, and // that's expected. If a download fails, we log it at info-level, and retry. @@ -1091,7 +1077,11 @@ impl RemoteTimelineClient { if !wanted(x) && wanted(y) { // this could be avoided by having external in-memory synchronization, like // timeline detach ancestor - warn!(?reason, op="insert", "unexpected: two racing processes to enable and disable a gc blocking reason"); + warn!( + ?reason, + op = "insert", + "unexpected: two racing processes to enable and disable a gc blocking reason" + ); } // at this point, the metadata must always show that there is a parent @@ -1145,7 +1135,11 @@ impl RemoteTimelineClient { (x, y) if wanted(x) && !wanted(y) => Some(self.schedule_barrier0(upload_queue)), (x, y) => { if !wanted(x) && wanted(y) { - warn!(?reason, op="remove", "unexpected: two racing processes to enable and disable a gc blocking reason (remove)"); + warn!( + ?reason, + op = "remove", + "unexpected: two racing processes to enable and disable a gc blocking reason (remove)" + ); } upload_queue.dirty.gc_blocking = @@ -1287,12 +1281,14 @@ impl RemoteTimelineClient { #[cfg(feature = "testing")] for (name, metadata) in &with_metadata { - let gen = metadata.generation; - if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen) { - if unexpected == gen { + let gen_ = metadata.generation; + if let Some(unexpected) = upload_queue.dangling_files.insert(name.to_owned(), gen_) { + if unexpected == gen_ { tracing::error!("{name} was unlinked twice with same generation"); } else { - tracing::error!("{name} was unlinked twice with different generations {gen:?} and {unexpected:?}"); + tracing::error!( + "{name} was unlinked twice with different generations {gen_:?} and {unexpected:?}" + ); } } } @@ -1354,11 +1350,11 @@ impl RemoteTimelineClient { #[cfg(feature = "testing")] for (name, meta) in &with_metadata { - let gen = meta.generation; + let gen_ = meta.generation; match upload_queue.dangling_files.remove(name) { - Some(same) if same == gen => { /* expected */ } + Some(same) if same == gen_ => { /* expected */ } Some(other) => { - tracing::error!("{name} was unlinked with {other:?} but deleted with {gen:?}"); + tracing::error!("{name} was unlinked with {other:?} but deleted with {gen_:?}"); } None => { tracing::error!("{name} was unlinked but was not dangling"); @@ -1455,7 +1451,9 @@ impl RemoteTimelineClient { // proper stop is yet to be called. On cancel the original or some later task must call // `stop` or `shutdown`. let sg = scopeguard::guard((), |_| { - tracing::error!("RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error") + tracing::error!( + "RemoteTimelineClient::shutdown was cancelled; this should not happen, do not make this into an allowed_error" + ) }); let fut = { @@ -1471,7 +1469,7 @@ impl RemoteTimelineClient { scopeguard::ScopeGuard::into_inner(sg); return; } - UploadQueue::Initialized(ref mut init) => init, + UploadQueue::Initialized(init) => init, }; // if the queue is already stuck due to a shutdown operation which was cancelled, then @@ -1831,7 +1829,9 @@ impl RemoteTimelineClient { .map(|n| n.starts_with(IndexPart::FILE_NAME)) .unwrap_or(false) }) - .filter_map(|o| parse_remote_index_path(o.key.clone()).map(|gen| (o.key.clone(), gen))) + .filter_map(|o| { + parse_remote_index_path(o.key.clone()).map(|gen_| (o.key.clone(), gen_)) + }) .max_by_key(|i| i.1) .map(|i| i.0.clone()) .unwrap_or( @@ -2023,7 +2023,7 @@ impl RemoteTimelineClient { } let upload_result: anyhow::Result<()> = match &task.op { - UploadOp::UploadLayer(ref layer, ref layer_metadata, mode) => { + UploadOp::UploadLayer(layer, layer_metadata, mode) => { // TODO: check if this mechanism can be removed now that can_bypass() performs // conflict checks during scheduling. if let Some(OpType::FlushDeletion) = mode { @@ -2113,7 +2113,7 @@ impl RemoteTimelineClient { ) .await } - UploadOp::UploadMetadata { ref uploaded } => { + UploadOp::UploadMetadata { uploaded } => { let res = upload::upload_index_part( &self.storage_impl, &self.tenant_shard_id, @@ -2229,11 +2229,11 @@ impl RemoteTimelineClient { let lsn_update = { let mut upload_queue_guard = self.upload_queue.lock().unwrap(); let upload_queue = match upload_queue_guard.deref_mut() { - UploadQueue::Uninitialized => panic!("callers are responsible for ensuring this is only called on an initialized queue"), - UploadQueue::Stopped(_stopped) => { - None - }, - UploadQueue::Initialized(qi) => { Some(qi) } + UploadQueue::Uninitialized => panic!( + "callers are responsible for ensuring this is only called on an initialized queue" + ), + UploadQueue::Stopped(_stopped) => None, + UploadQueue::Initialized(qi) => Some(qi), }; let upload_queue = match upload_queue { @@ -2255,7 +2255,11 @@ impl RemoteTimelineClient { let is_later = last_updater.is_some_and(|task_id| task_id < task.task_id); let monotone = is_later || last_updater.is_none(); - assert!(monotone, "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", task.task_id); + assert!( + monotone, + "no two index uploads should be completing at the same time, prev={last_updater:?}, task.task_id={}", + task.task_id + ); // not taking ownership is wasteful upload_queue.clean.0.clone_from(uploaded); @@ -2654,20 +2658,16 @@ pub fn parse_remote_tenant_manifest_path(path: RemotePath) -> Option #[cfg(test)] mod tests { - use super::*; - use crate::{ - context::RequestContext, - tenant::{ - config::AttachmentMode, - harness::{TenantHarness, TIMELINE_ID}, - storage_layer::layer::local_layer_path, - Tenant, Timeline, - }, - DEFAULT_PG_VERSION, - }; - use std::collections::HashSet; + use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::context::RequestContext; + use crate::tenant::config::AttachmentMode; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + use crate::tenant::storage_layer::layer::local_layer_path; + use crate::tenant::{Tenant, Timeline}; + pub(super) fn dummy_contents(name: &str) -> Vec { format!("contents for {name}").into() } diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index b4d45dca75..92be2145ce 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -8,41 +8,39 @@ use std::future::Future; use std::str::FromStr; use std::time::SystemTime; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use camino::{Utf8Path, Utf8PathBuf}; use pageserver_api::shard::TenantShardId; +use remote_storage::{ + DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, +}; use tokio::fs::{self, File, OpenOptions}; use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio_util::io::StreamReader; use tokio_util::sync::CancellationToken; use tracing::warn; -use utils::backoff; +use utils::crashsafe::path_with_suffix_extension; +use utils::id::{TenantId, TimelineId}; +use utils::{backoff, pausable_failpoint}; +use super::index::{IndexPart, LayerFileMetadata}; +use super::manifest::TenantManifest; +use super::{ + FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, INITDB_PATH, parse_remote_index_path, + parse_remote_tenant_manifest_path, remote_index_path, remote_initdb_archive_path, + remote_initdb_preserved_archive_path, remote_tenant_manifest_path, + remote_tenant_manifest_prefix, remote_tenant_path, +}; +use crate::TEMP_FILE_SUFFIX; use crate::config::PageServerConf; use crate::context::RequestContext; use crate::span::{ debug_assert_current_span_has_tenant_and_timeline_id, debug_assert_current_span_has_tenant_id, }; +use crate::tenant::Generation; use crate::tenant::remote_timeline_client::{remote_layer_path, remote_timelines_path}; use crate::tenant::storage_layer::LayerName; -use crate::tenant::Generation; -use crate::virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}; -use crate::TEMP_FILE_SUFFIX; -use remote_storage::{ - DownloadError, DownloadKind, DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, -}; -use utils::crashsafe::path_with_suffix_extension; -use utils::id::{TenantId, TimelineId}; -use utils::pausable_failpoint; - -use super::index::{IndexPart, LayerFileMetadata}; -use super::manifest::TenantManifest; -use super::{ - parse_remote_index_path, parse_remote_tenant_manifest_path, remote_index_path, - remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path, - remote_tenant_manifest_prefix, remote_tenant_path, FAILED_DOWNLOAD_WARN_THRESHOLD, - FAILED_REMOTE_OP_RETRIES, INITDB_PATH, -}; +use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error}; /// /// If 'metadata' is given, we will validate that the downloaded file's size matches that @@ -207,9 +205,9 @@ async fn download_object( } #[cfg(target_os = "linux")] crate::virtual_file::io_engine::IoEngine::TokioEpollUring => { - use crate::virtual_file::owned_buffers_io; - use crate::virtual_file::IoBufferMut; use std::sync::Arc; + + use crate::virtual_file::{IoBufferMut, owned_buffers_io}; async { let destination_file = Arc::new( VirtualFile::create(dst_path, ctx) diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index 727b25fbf4..ceaed58bbd 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -7,16 +7,16 @@ use std::collections::HashMap; use chrono::NaiveDateTime; use pageserver_api::models::AuxFilePolicy; +use pageserver_api::shard::ShardIndex; use serde::{Deserialize, Serialize}; +use utils::id::TimelineId; +use utils::lsn::Lsn; use super::is_same_remote_layer_path; +use crate::tenant::Generation; use crate::tenant::metadata::TimelineMetadata; use crate::tenant::storage_layer::LayerName; use crate::tenant::timeline::import_pgdata; -use crate::tenant::Generation; -use pageserver_api::shard::ShardIndex; -use utils::id::TimelineId; -use utils::lsn::Lsn; /// In-memory representation of an `index_part.json` file /// @@ -435,10 +435,12 @@ impl GcBlocking { #[cfg(test)] mod tests { - use super::*; use std::str::FromStr; + use utils::id::TimelineId; + use super::*; + #[test] fn v1_indexpart_is_parsed() { let example = r#"{ diff --git a/pageserver/src/tenant/remote_timeline_client/manifest.rs b/pageserver/src/tenant/remote_timeline_client/manifest.rs index 2029847a12..543ccc219d 100644 --- a/pageserver/src/tenant/remote_timeline_client/manifest.rs +++ b/pageserver/src/tenant/remote_timeline_client/manifest.rs @@ -1,6 +1,7 @@ use chrono::NaiveDateTime; use serde::{Deserialize, Serialize}; -use utils::{id::TimelineId, lsn::Lsn}; +use utils::id::TimelineId; +use utils::lsn::Lsn; /// Tenant-shard scoped manifest #[derive(Clone, Serialize, Deserialize, PartialEq, Eq)] diff --git a/pageserver/src/tenant/remote_timeline_client/upload.rs b/pageserver/src/tenant/remote_timeline_client/upload.rs index af4dbbbfb6..7d9f47665a 100644 --- a/pageserver/src/tenant/remote_timeline_client/upload.rs +++ b/pageserver/src/tenant/remote_timeline_client/upload.rs @@ -1,28 +1,28 @@ //! Helper functions to upload files to remote storage with a RemoteStorage -use anyhow::{bail, Context}; +use std::io::{ErrorKind, SeekFrom}; +use std::time::SystemTime; + +use anyhow::{Context, bail}; use bytes::Bytes; use camino::Utf8Path; use fail::fail_point; use pageserver_api::shard::TenantShardId; -use std::io::{ErrorKind, SeekFrom}; -use std::time::SystemTime; +use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError}; use tokio::fs::{self, File}; use tokio::io::AsyncSeekExt; use tokio_util::sync::CancellationToken; +use tracing::info; +use utils::id::{TenantId, TimelineId}; use utils::{backoff, pausable_failpoint}; +use super::Generation; use super::index::IndexPart; use super::manifest::TenantManifest; -use super::Generation; use crate::tenant::remote_timeline_client::{ remote_index_path, remote_initdb_archive_path, remote_initdb_preserved_archive_path, remote_tenant_manifest_path, }; -use remote_storage::{GenericRemoteStorage, RemotePath, TimeTravelError}; -use utils::id::{TenantId, TimelineId}; - -use tracing::info; /// Serializes and uploads the given index part data to the remote storage. pub(crate) async fn upload_index_part( @@ -134,7 +134,9 @@ pub(super) async fn upload_timeline_layer<'a>( .len(); if metadata_size != fs_size { - bail!("File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}"); + bail!( + "File {local_path:?} has its current FS size {fs_size} diferent from initially determined {metadata_size}" + ); } let fs_size = usize::try_from(fs_size) diff --git a/pageserver/src/tenant/secondary.rs b/pageserver/src/tenant/secondary.rs index 4bc208331b..8f8622c796 100644 --- a/pageserver/src/tenant/secondary.rs +++ b/pageserver/src/tenant/secondary.rs @@ -3,40 +3,31 @@ pub mod heatmap; mod heatmap_uploader; mod scheduler; -use std::{sync::Arc, time::SystemTime}; +use std::sync::Arc; +use std::time::SystemTime; -use crate::{ - context::RequestContext, - disk_usage_eviction_task::DiskUsageEvictionInfo, - metrics::SECONDARY_HEATMAP_TOTAL_SIZE, - task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}, -}; - -use self::{ - downloader::{downloader_task, SecondaryDetail}, - heatmap_uploader::heatmap_uploader_task, -}; - -use super::{ - config::{SecondaryLocationConfig, TenantConfOpt}, - mgr::TenantManager, - span::debug_assert_current_span_has_tenant_id, - storage_layer::LayerName, - GetTenantError, -}; - -use crate::metrics::SECONDARY_RESIDENT_PHYSICAL_SIZE; use metrics::UIntGauge; -use pageserver_api::{ - models, - shard::{ShardIdentity, TenantShardId}, -}; +use pageserver_api::models; +use pageserver_api::shard::{ShardIdentity, TenantShardId}; use remote_storage::GenericRemoteStorage; - use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::instrument; -use utils::{completion::Barrier, id::TimelineId, sync::gate::Gate}; +use utils::completion::Barrier; +use utils::id::TimelineId; +use utils::sync::gate::Gate; + +use self::downloader::{SecondaryDetail, downloader_task}; +use self::heatmap_uploader::heatmap_uploader_task; +use super::GetTenantError; +use super::config::{SecondaryLocationConfig, TenantConfOpt}; +use super::mgr::TenantManager; +use super::span::debug_assert_current_span_has_tenant_id; +use super::storage_layer::LayerName; +use crate::context::RequestContext; +use crate::disk_usage_eviction_task::DiskUsageEvictionInfo; +use crate::metrics::{SECONDARY_HEATMAP_TOTAL_SIZE, SECONDARY_RESIDENT_PHYSICAL_SIZE}; +use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind}; enum DownloadCommand { Download(TenantShardId), diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 2e8c3946bd..a13b9323ac 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -1,47 +1,8 @@ -use std::{ - collections::{HashMap, HashSet}, - pin::Pin, - str::FromStr, - sync::Arc, - time::{Duration, Instant, SystemTime}, -}; - -use crate::{ - config::PageServerConf, - context::RequestContext, - disk_usage_eviction_task::{ - finite_f32, DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, - }, - metrics::SECONDARY_MODE, - tenant::{ - config::SecondaryLocationConfig, - debug_assert_current_span_has_tenant_and_timeline_id, - ephemeral_file::is_ephemeral_file, - remote_timeline_client::{ - index::LayerFileMetadata, is_temp_download_file, FAILED_DOWNLOAD_WARN_THRESHOLD, - FAILED_REMOTE_OP_RETRIES, - }, - span::debug_assert_current_span_has_tenant_id, - storage_layer::{layer::local_layer_path, LayerName, LayerVisibilityHint}, - tasks::{warn_when_period_overrun, BackgroundLoopKind}, - }, - virtual_file::{on_fatal_io_error, MaybeFatalIo, VirtualFile}, - TEMP_FILE_SUFFIX, -}; - -use super::{ - heatmap::HeatMapLayer, - scheduler::{ - self, period_jitter, period_warmup, Completion, JobGenerator, SchedulingResult, - TenantBackgroundJobs, - }, - GetTenantError, SecondaryTenant, SecondaryTenantError, -}; - -use crate::tenant::{ - mgr::TenantManager, - remote_timeline_client::{download::download_layer_file, remote_heatmap_path}, -}; +use std::collections::{HashMap, HashSet}; +use std::pin::Pin; +use std::str::FromStr; +use std::sync::Arc; +use std::time::{Duration, Instant, SystemTime}; use camino::Utf8PathBuf; use chrono::format::{DelayedFormat, StrftimeItems}; @@ -50,18 +11,43 @@ use metrics::UIntGauge; use pageserver_api::models::SecondaryProgress; use pageserver_api::shard::TenantShardId; use remote_storage::{DownloadError, DownloadKind, DownloadOpts, Etag, GenericRemoteStorage}; - use tokio_util::sync::CancellationToken; -use tracing::{info_span, instrument, warn, Instrument}; -use utils::{ - backoff, completion::Barrier, crashsafe::path_with_suffix_extension, failpoint_support, fs_ext, - id::TimelineId, pausable_failpoint, serde_system_time, -}; +use tracing::{Instrument, info_span, instrument, warn}; +use utils::completion::Barrier; +use utils::crashsafe::path_with_suffix_extension; +use utils::id::TimelineId; +use utils::{backoff, failpoint_support, fs_ext, pausable_failpoint, serde_system_time}; -use super::{ - heatmap::{HeatMapTenant, HeatMapTimeline}, - CommandRequest, DownloadCommand, +use super::heatmap::{HeatMapLayer, HeatMapTenant, HeatMapTimeline}; +use super::scheduler::{ + self, Completion, JobGenerator, SchedulingResult, TenantBackgroundJobs, period_jitter, + period_warmup, }; +use super::{ + CommandRequest, DownloadCommand, GetTenantError, SecondaryTenant, SecondaryTenantError, +}; +use crate::TEMP_FILE_SUFFIX; +use crate::config::PageServerConf; +use crate::context::RequestContext; +use crate::disk_usage_eviction_task::{ + DiskUsageEvictionInfo, EvictionCandidate, EvictionLayer, EvictionSecondaryLayer, finite_f32, +}; +use crate::metrics::SECONDARY_MODE; +use crate::tenant::config::SecondaryLocationConfig; +use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::tenant::ephemeral_file::is_ephemeral_file; +use crate::tenant::mgr::TenantManager; +use crate::tenant::remote_timeline_client::download::download_layer_file; +use crate::tenant::remote_timeline_client::index::LayerFileMetadata; +use crate::tenant::remote_timeline_client::{ + FAILED_DOWNLOAD_WARN_THRESHOLD, FAILED_REMOTE_OP_RETRIES, is_temp_download_file, + remote_heatmap_path, +}; +use crate::tenant::span::debug_assert_current_span_has_tenant_id; +use crate::tenant::storage_layer::layer::local_layer_path; +use crate::tenant::storage_layer::{LayerName, LayerVisibilityHint}; +use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun}; +use crate::virtual_file::{MaybeFatalIo, VirtualFile, on_fatal_io_error}; /// For each tenant, default period for how long must have passed since the last download_tenant call before /// calling it again. This default is replaced with the value of [`HeatMapTenant::upload_period_ms`] after first diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs index 0fa10ca294..4a938e9095 100644 --- a/pageserver/src/tenant/secondary/heatmap.rs +++ b/pageserver/src/tenant/secondary/heatmap.rs @@ -1,11 +1,13 @@ -use std::{collections::HashMap, time::SystemTime}; - -use crate::tenant::{remote_timeline_client::index::LayerFileMetadata, storage_layer::LayerName}; +use std::collections::HashMap; +use std::time::SystemTime; use serde::{Deserialize, Serialize}; -use serde_with::{serde_as, DisplayFromStr, TimestampSeconds}; +use serde_with::{DisplayFromStr, TimestampSeconds, serde_as}; +use utils::generation::Generation; +use utils::id::TimelineId; -use utils::{generation::Generation, id::TimelineId}; +use crate::tenant::remote_timeline_client::index::LayerFileMetadata; +use crate::tenant::storage_layer::LayerName; #[derive(Serialize, Deserialize)] pub(crate) struct HeatMapTenant { diff --git a/pageserver/src/tenant/secondary/heatmap_uploader.rs b/pageserver/src/tenant/secondary/heatmap_uploader.rs index d72c337369..3375714a66 100644 --- a/pageserver/src/tenant/secondary/heatmap_uploader.rs +++ b/pageserver/src/tenant/secondary/heatmap_uploader.rs @@ -1,42 +1,33 @@ -use std::{ - collections::HashMap, - pin::Pin, - sync::{Arc, Weak}, - time::{Duration, Instant}, -}; - -use crate::{ - metrics::SECONDARY_MODE, - tenant::{ - config::AttachmentMode, - mgr::{GetTenantError, TenantManager}, - remote_timeline_client::remote_heatmap_path, - span::debug_assert_current_span_has_tenant_id, - tasks::{warn_when_period_overrun, BackgroundLoopKind}, - Tenant, - }, - virtual_file::VirtualFile, - TEMP_FILE_SUFFIX, -}; +use std::collections::HashMap; +use std::pin::Pin; +use std::sync::{Arc, Weak}; +use std::time::{Duration, Instant}; use futures::Future; use pageserver_api::shard::TenantShardId; use remote_storage::{GenericRemoteStorage, TimeoutOrCancel}; - -use super::{ - heatmap::HeatMapTenant, - scheduler::{ - self, period_jitter, period_warmup, JobGenerator, RunningJob, SchedulingResult, - TenantBackgroundJobs, - }, - CommandRequest, SecondaryTenantError, UploadCommand, -}; use tokio_util::sync::CancellationToken; -use tracing::{info_span, instrument, Instrument}; -use utils::{ - backoff, completion::Barrier, crashsafe::path_with_suffix_extension, - yielding_loop::yielding_loop, +use tracing::{Instrument, info_span, instrument}; +use utils::backoff; +use utils::completion::Barrier; +use utils::crashsafe::path_with_suffix_extension; +use utils::yielding_loop::yielding_loop; + +use super::heatmap::HeatMapTenant; +use super::scheduler::{ + self, JobGenerator, RunningJob, SchedulingResult, TenantBackgroundJobs, period_jitter, + period_warmup, }; +use super::{CommandRequest, SecondaryTenantError, UploadCommand}; +use crate::TEMP_FILE_SUFFIX; +use crate::metrics::SECONDARY_MODE; +use crate::tenant::Tenant; +use crate::tenant::config::AttachmentMode; +use crate::tenant::mgr::{GetTenantError, TenantManager}; +use crate::tenant::remote_timeline_client::remote_heatmap_path; +use crate::tenant::span::debug_assert_current_span_has_tenant_id; +use crate::tenant::tasks::{BackgroundLoopKind, warn_when_period_overrun}; +use crate::virtual_file::VirtualFile; pub(super) async fn heatmap_uploader_task( tenant_manager: Arc, diff --git a/pageserver/src/tenant/secondary/scheduler.rs b/pageserver/src/tenant/secondary/scheduler.rs index e963c722b9..f948f9114f 100644 --- a/pageserver/src/tenant/secondary/scheduler.rs +++ b/pageserver/src/tenant/secondary/scheduler.rs @@ -1,16 +1,15 @@ -use futures::Future; -use rand::Rng; -use std::{ - collections::HashMap, - marker::PhantomData, - pin::Pin, - time::{Duration, Instant}, -}; +use std::collections::HashMap; +use std::marker::PhantomData; +use std::pin::Pin; +use std::time::{Duration, Instant}; +use futures::Future; use pageserver_api::shard::TenantShardId; +use rand::Rng; use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; -use utils::{completion::Barrier, yielding_loop::yielding_loop}; +use utils::completion::Barrier; +use utils::yielding_loop::yielding_loop; use super::{CommandRequest, CommandResponse, SecondaryTenantError}; diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index 1e84a9d9dc..ed6b351c75 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -4,21 +4,18 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; use tenant_size_model::svg::SvgBranchKind; -use tokio::sync::oneshot::error::RecvError; +use tenant_size_model::{Segment, StorageModel}; use tokio::sync::Semaphore; +use tokio::sync::oneshot::error::RecvError; use tokio_util::sync::CancellationToken; - -use crate::context::RequestContext; -use crate::pgdatadir_mapping::CalculateLogicalSizeError; - -use super::{GcError, LogicalSizeCalculationCause, Tenant}; -use crate::tenant::{MaybeOffloaded, Timeline}; +use tracing::*; use utils::id::TimelineId; use utils::lsn::Lsn; -use tracing::*; - -use tenant_size_model::{Segment, StorageModel}; +use super::{GcError, LogicalSizeCalculationCause, Tenant}; +use crate::context::RequestContext; +use crate::pgdatadir_mapping::CalculateLogicalSizeError; +use crate::tenant::{MaybeOffloaded, Timeline}; /// Inputs to the actual tenant sizing model /// @@ -498,7 +495,9 @@ async fn fill_logical_sizes( } Err(join_error) => { // cannot really do anything, as this panic is likely a bug - error!("task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}"); + error!( + "task that calls spawn_ondemand_logical_size_calculation panicked: {join_error:#}" + ); have_any_error = Some(CalculateSyntheticSizeError::Fatal( anyhow::anyhow!(join_error) diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index f9f843ef6b..7f313f46a2 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -10,42 +10,39 @@ mod layer_desc; mod layer_name; pub mod merge_iterator; -use crate::config::PageServerConf; -use crate::context::{AccessStatsBehavior, RequestContext}; -use bytes::Bytes; -use futures::stream::FuturesUnordered; -use futures::StreamExt; -use pageserver_api::key::Key; -use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum}; -use pageserver_api::record::NeonWalRecord; -use pageserver_api::value::Value; use std::cmp::Ordering; use std::collections::hash_map::Entry; use std::collections::{BinaryHeap, HashMap}; use std::future::Future; use std::ops::Range; use std::pin::Pin; -use std::sync::atomic::AtomicUsize; use std::sync::Arc; +use std::sync::atomic::AtomicUsize; use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use tracing::{trace, Instrument}; -use utils::sync::gate::GateGuard; - -use utils::lsn::Lsn; pub use batch_split_writer::{BatchLayerWriter, SplitDeltaLayerWriter, SplitImageLayerWriter}; +use bytes::Bytes; pub use delta_layer::{DeltaLayer, DeltaLayerWriter, ValueRef}; +use futures::StreamExt; +use futures::stream::FuturesUnordered; pub use image_layer::{ImageLayer, ImageLayerWriter}; pub use inmemory_layer::InMemoryLayer; +pub(crate) use layer::{EvictionError, Layer, ResidentLayer}; pub use layer_desc::{PersistentLayerDesc, PersistentLayerKey}; pub use layer_name::{DeltaLayerName, ImageLayerName, LayerName}; - -pub(crate) use layer::{EvictionError, Layer, ResidentLayer}; +use pageserver_api::key::Key; +use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum}; +use pageserver_api::record::NeonWalRecord; +use pageserver_api::value::Value; +use tracing::{Instrument, trace}; +use utils::lsn::Lsn; +use utils::sync::gate::GateGuard; use self::inmemory_layer::InMemoryLayerFileId; - -use super::timeline::{GetVectoredError, ReadPath}; use super::PageReconstructError; +use super::timeline::{GetVectoredError, ReadPath}; +use crate::config::PageServerConf; +use crate::context::{AccessStatsBehavior, RequestContext}; pub fn range_overlaps(a: &Range, b: &Range) -> bool where @@ -510,6 +507,7 @@ impl IoConcurrency { #[cfg(test)] pub(crate) fn spawn_for_test() -> impl std::ops::DerefMut { use std::ops::{Deref, DerefMut}; + use tracing::info; use utils::sync::gate::Gate; diff --git a/pageserver/src/tenant/storage_layer/batch_split_writer.rs b/pageserver/src/tenant/storage_layer/batch_split_writer.rs index 7da51c27df..fd50e4805d 100644 --- a/pageserver/src/tenant/storage_layer/batch_split_writer.rs +++ b/pageserver/src/tenant/storage_layer/batch_split_writer.rs @@ -1,17 +1,22 @@ -use std::{future::Future, ops::Range, sync::Arc}; +use std::future::Future; +use std::ops::Range; +use std::sync::Arc; use bytes::Bytes; -use pageserver_api::key::{Key, KEY_SIZE}; -use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId}; - -use crate::tenant::storage_layer::Layer; -use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline}; +use pageserver_api::key::{KEY_SIZE, Key}; use pageserver_api::value::Value; +use utils::id::TimelineId; +use utils::lsn::Lsn; +use utils::shard::TenantShardId; use super::layer::S3_UPLOAD_LIMIT; use super::{ DeltaLayerWriter, ImageLayerWriter, PersistentLayerDesc, PersistentLayerKey, ResidentLayer, }; +use crate::config::PageServerConf; +use crate::context::RequestContext; +use crate::tenant::Timeline; +use crate::tenant::storage_layer::Layer; pub(crate) enum BatchWriterResult { Produced(ResidentLayer), @@ -423,15 +428,10 @@ mod tests { use itertools::Itertools; use rand::{RngCore, SeedableRng}; - use crate::{ - tenant::{ - harness::{TenantHarness, TIMELINE_ID}, - storage_layer::AsLayerDesc, - }, - DEFAULT_PG_VERSION, - }; - use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + use crate::tenant::storage_layer::AsLayerDesc; fn get_key(id: u32) -> Key { let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap(); diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 7ba0e3679f..d9afdc2405 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -27,6 +27,38 @@ //! "values" part. The actual page images and WAL records are stored in the //! "values" part. //! +use std::collections::{HashMap, VecDeque}; +use std::fs::File; +use std::io::SeekFrom; +use std::ops::Range; +use std::os::unix::fs::FileExt; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::{Context, Result, bail, ensure}; +use camino::{Utf8Path, Utf8PathBuf}; +use futures::StreamExt; +use itertools::Itertools; +use pageserver_api::config::MaxVectoredReadBytes; +use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key}; +use pageserver_api::keyspace::KeySpace; +use pageserver_api::models::ImageCompressionAlgorithm; +use pageserver_api::shard::TenantShardId; +use pageserver_api::value::Value; +use rand::Rng; +use rand::distributions::Alphanumeric; +use serde::{Deserialize, Serialize}; +use tokio::sync::OnceCell; +use tokio_epoll_uring::IoBuf; +use tracing::*; +use utils::bin_ser::BeSer; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; + +use super::{ + AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer, + ValuesReconstructState, +}; use crate::config::PageServerConf; use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; use crate::page_cache::{self, FileId, PAGE_SZ}; @@ -42,43 +74,8 @@ use crate::tenant::vectored_blob_io::{ VectoredReadPlanner, }; use crate::virtual_file::owned_buffers_io::io_buf_ext::{FullSlice, IoBufExt}; -use crate::virtual_file::IoBufferMut; -use crate::virtual_file::{self, MaybeFatalIo, VirtualFile}; -use crate::TEMP_FILE_SUFFIX; -use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION}; -use anyhow::{bail, ensure, Context, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use futures::StreamExt; -use itertools::Itertools; -use pageserver_api::config::MaxVectoredReadBytes; -use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE}; -use pageserver_api::keyspace::KeySpace; -use pageserver_api::models::ImageCompressionAlgorithm; -use pageserver_api::shard::TenantShardId; -use pageserver_api::value::Value; -use rand::{distributions::Alphanumeric, Rng}; -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, VecDeque}; -use std::fs::File; -use std::io::SeekFrom; -use std::ops::Range; -use std::os::unix::fs::FileExt; -use std::str::FromStr; -use std::sync::Arc; -use tokio::sync::OnceCell; -use tokio_epoll_uring::IoBuf; -use tracing::*; - -use utils::{ - bin_ser::BeSer, - id::{TenantId, TimelineId}, - lsn::Lsn, -}; - -use super::{ - AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer, - ValuesReconstructState, -}; +use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile}; +use crate::{DELTA_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX}; /// /// Header stored in the beginning of the file @@ -1130,10 +1127,11 @@ impl DeltaLayerInner { until: Lsn, ctx: &RequestContext, ) -> anyhow::Result { + use futures::stream::TryStreamExt; + use crate::tenant::vectored_blob_io::{ BlobMeta, ChunkedVectoredReadBuilder, VectoredReadExtended, }; - use futures::stream::TryStreamExt; #[derive(Debug)] enum Item { @@ -1599,23 +1597,21 @@ impl DeltaLayerIterator<'_> { pub(crate) mod test { use std::collections::BTreeMap; + use bytes::Bytes; use itertools::MinMaxResult; - use rand::prelude::{SeedableRng, SliceRandom, StdRng}; + use pageserver_api::value::Value; use rand::RngCore; + use rand::prelude::{SeedableRng, SliceRandom, StdRng}; use super::*; - use crate::tenant::harness::TIMELINE_ID; + use crate::DEFAULT_PG_VERSION; + use crate::context::DownloadBehavior; + use crate::task_mgr::TaskKind; + use crate::tenant::disk_btree::tests::TestDisk; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; use crate::tenant::storage_layer::{Layer, ResidentLayer}; use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner; use crate::tenant::{Tenant, Timeline}; - use crate::{ - context::DownloadBehavior, - task_mgr::TaskKind, - tenant::{disk_btree::tests::TestDisk, harness::TenantHarness}, - DEFAULT_PG_VERSION, - }; - use bytes::Bytes; - use pageserver_api::value::Value; /// Construct an index for a fictional delta layer and and then /// traverse in order to plan vectored reads for a query. Finally, diff --git a/pageserver/src/tenant/storage_layer/filter_iterator.rs b/pageserver/src/tenant/storage_layer/filter_iterator.rs index 8660be1fcc..8d172a1c19 100644 --- a/pageserver/src/tenant/storage_layer/filter_iterator.rs +++ b/pageserver/src/tenant/storage_layer/filter_iterator.rs @@ -1,18 +1,14 @@ -use std::{ops::Range, sync::Arc}; +use std::ops::Range; +use std::sync::Arc; use anyhow::bail; -use pageserver_api::{ - key::Key, - keyspace::{KeySpace, SparseKeySpace}, -}; +use pageserver_api::key::Key; +use pageserver_api::keyspace::{KeySpace, SparseKeySpace}; +use pageserver_api::value::Value; use utils::lsn::Lsn; -use pageserver_api::value::Value; - -use super::{ - merge_iterator::{MergeIterator, MergeIteratorItem}, - PersistentLayerKey, -}; +use super::PersistentLayerKey; +use super::merge_iterator::{MergeIterator, MergeIteratorItem}; /// A filter iterator over merge iterators (and can be easily extended to other types of iterators). /// @@ -98,19 +94,14 @@ impl<'a> FilterIterator<'a> { #[cfg(test)] mod tests { - use super::*; - use itertools::Itertools; use pageserver_api::key::Key; use utils::lsn::Lsn; - use crate::{ - tenant::{ - harness::{TenantHarness, TIMELINE_ID}, - storage_layer::delta_layer::test::produce_delta_layer, - }, - DEFAULT_PG_VERSION, - }; + use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + use crate::tenant::storage_layer::delta_layer::test::produce_delta_layer; async fn assert_filter_iter_equal( filter_iter: &mut FilterIterator<'_>, diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index dc611bd6e1..0db9e8c845 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -25,6 +25,39 @@ //! layer, and offsets to the other parts. The "index" is a B-tree, //! mapping from Key to an offset in the "values" part. The //! actual page images are stored in the "values" part. +use std::collections::{HashMap, VecDeque}; +use std::fs::File; +use std::io::SeekFrom; +use std::ops::Range; +use std::os::unix::prelude::FileExt; +use std::str::FromStr; +use std::sync::Arc; + +use anyhow::{Context, Result, bail, ensure}; +use bytes::Bytes; +use camino::{Utf8Path, Utf8PathBuf}; +use hex; +use itertools::Itertools; +use pageserver_api::config::MaxVectoredReadBytes; +use pageserver_api::key::{DBDIR_KEY, KEY_SIZE, Key}; +use pageserver_api::keyspace::KeySpace; +use pageserver_api::shard::{ShardIdentity, TenantShardId}; +use pageserver_api::value::Value; +use rand::Rng; +use rand::distributions::Alphanumeric; +use serde::{Deserialize, Serialize}; +use tokio::sync::OnceCell; +use tokio_stream::StreamExt; +use tracing::*; +use utils::bin_ser::BeSer; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; + +use super::layer_name::ImageLayerName; +use super::{ + AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer, + ValuesReconstructState, +}; use crate::config::PageServerConf; use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; use crate::page_cache::{self, FileId, PAGE_SZ}; @@ -39,43 +72,8 @@ use crate::tenant::vectored_blob_io::{ VectoredReadPlanner, }; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; -use crate::virtual_file::IoBufferMut; -use crate::virtual_file::{self, MaybeFatalIo, VirtualFile}; +use crate::virtual_file::{self, IoBufferMut, MaybeFatalIo, VirtualFile}; use crate::{IMAGE_FILE_MAGIC, STORAGE_FORMAT_VERSION, TEMP_FILE_SUFFIX}; -use anyhow::{bail, ensure, Context, Result}; -use bytes::Bytes; -use camino::{Utf8Path, Utf8PathBuf}; -use hex; -use itertools::Itertools; -use pageserver_api::config::MaxVectoredReadBytes; -use pageserver_api::key::{Key, DBDIR_KEY, KEY_SIZE}; -use pageserver_api::keyspace::KeySpace; -use pageserver_api::shard::{ShardIdentity, TenantShardId}; -use pageserver_api::value::Value; -use rand::{distributions::Alphanumeric, Rng}; -use serde::{Deserialize, Serialize}; -use std::collections::{HashMap, VecDeque}; -use std::fs::File; -use std::io::SeekFrom; -use std::ops::Range; -use std::os::unix::prelude::FileExt; -use std::str::FromStr; -use std::sync::Arc; -use tokio::sync::OnceCell; -use tokio_stream::StreamExt; -use tracing::*; - -use utils::{ - bin_ser::BeSer, - id::{TenantId, TimelineId}, - lsn::Lsn, -}; - -use super::layer_name::ImageLayerName; -use super::{ - AsLayerDesc, LayerName, OnDiskValue, OnDiskValueIo, PersistentLayerDesc, ResidentLayer, - ValuesReconstructState, -}; /// /// Header stored in the beginning of the file @@ -1135,34 +1133,26 @@ impl ImageLayerIterator<'_> { #[cfg(test)] mod test { - use std::{sync::Arc, time::Duration}; + use std::sync::Arc; + use std::time::Duration; use bytes::Bytes; use itertools::Itertools; - use pageserver_api::{ - key::Key, - shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize}, - value::Value, - }; - use utils::{ - generation::Generation, - id::{TenantId, TimelineId}, - lsn::Lsn, - }; - - use crate::{ - context::RequestContext, - tenant::{ - config::TenantConf, - harness::{TenantHarness, TIMELINE_ID}, - storage_layer::{Layer, ResidentLayer}, - vectored_blob_io::StreamingVectoredReadPlanner, - Tenant, Timeline, - }, - DEFAULT_PG_VERSION, - }; + use pageserver_api::key::Key; + use pageserver_api::shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize}; + use pageserver_api::value::Value; + use utils::generation::Generation; + use utils::id::{TenantId, TimelineId}; + use utils::lsn::Lsn; use super::{ImageLayerIterator, ImageLayerWriter}; + use crate::DEFAULT_PG_VERSION; + use crate::context::RequestContext; + use crate::tenant::config::TenantConf; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + use crate::tenant::storage_layer::{Layer, ResidentLayer}; + use crate::tenant::vectored_blob_io::StreamingVectoredReadPlanner; + use crate::tenant::{Tenant, Timeline}; #[tokio::test] async fn image_layer_rewrite() { @@ -1172,10 +1162,10 @@ mod test { ..TenantConf::default() }; let tenant_id = TenantId::generate(); - let mut gen = Generation::new(0xdead0001); + let mut gen_ = Generation::new(0xdead0001); let mut get_next_gen = || { - let ret = gen; - gen = gen.next(); + let ret = gen_; + gen_ = gen_.next(); ret }; // The LSN at which we will create an image layer to filter diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index 61a0fdea8c..ffdfe1dc27 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -4,38 +4,39 @@ //! held in an ephemeral file, not in memory. The metadata for each page version, i.e. //! its position in the file, is kept in memory, though. //! -use crate::assert_u64_eq_usize::{u64_to_usize, U64IsUsize, UsizeIsU64}; +use std::cmp::Ordering; +use std::collections::{BTreeMap, HashMap}; +use std::fmt::Write; +use std::ops::Range; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering as AtomicOrdering}; +use std::sync::{Arc, OnceLock}; +use std::time::Instant; + +use anyhow::Result; +use camino::Utf8PathBuf; +use pageserver_api::key::{CompactKey, Key}; +use pageserver_api::keyspace::KeySpace; +use pageserver_api::models::InMemoryLayerInfo; +use pageserver_api::shard::TenantShardId; +use tokio::sync::RwLock; +use tracing::*; +use utils::id::TimelineId; +use utils::lsn::Lsn; +use utils::vec_map::VecMap; +use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta}; + +use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState}; +use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64, u64_to_usize}; use crate::config::PageServerConf; use crate::context::{PageContentKind, RequestContext, RequestContextBuilder}; +// avoid binding to Write (conflicts with std::io::Write) +// while being able to use std::fmt::Write's methods +use crate::metrics::TIMELINE_EPHEMERAL_BYTES; use crate::tenant::ephemeral_file::EphemeralFile; use crate::tenant::storage_layer::{OnDiskValue, OnDiskValueIo}; use crate::tenant::timeline::GetVectoredError; use crate::virtual_file::owned_buffers_io::io_buf_ext::IoBufExt; use crate::{l0_flush, page_cache}; -use anyhow::Result; -use camino::Utf8PathBuf; -use pageserver_api::key::CompactKey; -use pageserver_api::key::Key; -use pageserver_api::keyspace::KeySpace; -use pageserver_api::models::InMemoryLayerInfo; -use pageserver_api::shard::TenantShardId; -use std::collections::{BTreeMap, HashMap}; -use std::sync::{Arc, OnceLock}; -use std::time::Instant; -use tracing::*; -use utils::{id::TimelineId, lsn::Lsn, vec_map::VecMap}; -use wal_decoder::serialized_batch::{SerializedValueBatch, SerializedValueMeta, ValueMeta}; -// avoid binding to Write (conflicts with std::io::Write) -// while being able to use std::fmt::Write's methods -use crate::metrics::TIMELINE_EPHEMERAL_BYTES; -use std::cmp::Ordering; -use std::fmt::Write; -use std::ops::Range; -use std::sync::atomic::Ordering as AtomicOrdering; -use std::sync::atomic::{AtomicU64, AtomicUsize}; -use tokio::sync::RwLock; - -use super::{DeltaLayerWriter, PersistentLayerDesc, ValuesReconstructState}; pub(crate) mod vectored_dio_read; @@ -555,7 +556,9 @@ impl InMemoryLayer { gate: &utils::sync::gate::Gate, ctx: &RequestContext, ) -> Result { - trace!("initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}"); + trace!( + "initializing new empty InMemoryLayer for writing on timeline {timeline_id} at {start_lsn}" + ); let file = EphemeralFile::create(conf, tenant_shard_id, timeline_id, gate, ctx).await?; let key = InMemoryLayerFileId(file.page_cache_file_id()); @@ -816,8 +819,7 @@ mod tests { #[test] fn test_index_entry() { const MAX_SUPPORTED_POS: usize = IndexEntry::MAX_SUPPORTED_POS; - use IndexEntryNewArgs as Args; - use IndexEntryUnpacked as Unpacked; + use {IndexEntryNewArgs as Args, IndexEntryUnpacked as Unpacked}; let roundtrip = |args, expect: Unpacked| { let res = IndexEntry::new(args).expect("this tests expects no errors"); diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs index 1d86015fab..90455fd0ca 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer/vectored_dio_read.rs @@ -1,16 +1,13 @@ -use std::{ - collections::BTreeMap, - sync::{Arc, RwLock}, -}; +use std::collections::BTreeMap; +use std::sync::{Arc, RwLock}; use itertools::Itertools; use tokio_epoll_uring::{BoundedBuf, IoBufMut, Slice}; -use crate::{ - assert_u64_eq_usize::{U64IsUsize, UsizeIsU64}, - context::RequestContext, - virtual_file::{owned_buffers_io::io_buf_aligned::IoBufAlignedMut, IoBufferMut}, -}; +use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64}; +use crate::context::RequestContext; +use crate::virtual_file::IoBufferMut; +use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAlignedMut; /// The file interface we require. At runtime, this is a [`crate::tenant::ephemeral_file::EphemeralFile`]. pub trait File: Send { @@ -132,7 +129,9 @@ where let req_len = match cur { LogicalReadState::NotStarted(buf) => { if buf.len() != 0 { - panic!("The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`"); + panic!( + "The `LogicalRead`s that are passed in must be freshly created using `LogicalRead::new`" + ); } // buf.cap() == 0 is ok @@ -141,7 +140,9 @@ where *state = LogicalReadState::Ongoing(buf); req_len } - x => panic!("must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}"), + x => panic!( + "must only call with fresh LogicalReads, got another state, leaving Undefined state behind state={x:?}" + ), }; // plan which chunks we need to read from @@ -422,15 +423,15 @@ impl Buffer for Vec { #[cfg(test)] #[allow(clippy::assertions_on_constants)] mod tests { + use std::cell::RefCell; + use std::collections::VecDeque; + use rand::Rng; - use crate::{ - context::DownloadBehavior, task_mgr::TaskKind, - virtual_file::owned_buffers_io::slice::SliceMutExt, - }; - use super::*; - use std::{cell::RefCell, collections::VecDeque}; + use crate::context::DownloadBehavior; + use crate::task_mgr::TaskKind; + use crate::virtual_file::owned_buffers_io::slice::SliceMutExt; struct InMemoryFile { content: Vec, diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 0bf606cf0a..ae06aca63b 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -1,32 +1,32 @@ +use std::ops::Range; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use std::sync::{Arc, Weak}; +use std::time::{Duration, SystemTime}; + use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use pageserver_api::keyspace::KeySpace; use pageserver_api::models::HistoricLayerInfo; use pageserver_api::shard::{ShardIdentity, ShardIndex, TenantShardId}; -use std::ops::Range; -use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; -use std::sync::{Arc, Weak}; -use std::time::{Duration, SystemTime}; use tracing::Instrument; +use utils::generation::Generation; use utils::id::TimelineId; use utils::lsn::Lsn; use utils::sync::{gate, heavier_once_cell}; -use crate::config::PageServerConf; -use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder}; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; -use crate::task_mgr::TaskKind; -use crate::tenant::timeline::{CompactionError, GetVectoredError}; -use crate::tenant::{remote_timeline_client::LayerFileMetadata, Timeline}; - use super::delta_layer::{self}; use super::image_layer::{self}; use super::{ AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName, LayerVisibilityHint, PersistentLayerDesc, ValuesReconstructState, }; - -use utils::generation::Generation; +use crate::config::PageServerConf; +use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder}; +use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::task_mgr::TaskKind; +use crate::tenant::Timeline; +use crate::tenant::remote_timeline_client::LayerFileMetadata; +use crate::tenant::timeline::{CompactionError, GetVectoredError}; #[cfg(test)] mod tests; @@ -1873,8 +1873,8 @@ impl ResidentLayer { self.owner.record_access(ctx); let res = match inner { - Delta(ref d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await, - Image(ref i) => image_layer::ImageLayerInner::load_keys(i, ctx).await, + Delta(d) => delta_layer::DeltaLayerInner::load_keys(d, ctx).await, + Image(i) => image_layer::ImageLayerInner::load_keys(i, ctx).await, }; res.with_context(|| format!("Layer index is corrupted for {self}")) } @@ -1920,7 +1920,7 @@ impl ResidentLayer { let owner = &self.owner.0; match self.downloaded.get(owner, ctx).await? { - Delta(ref d) => d + Delta(d) => d .copy_prefix(writer, until, ctx) .await .with_context(|| format!("copy_delta_prefix until {until} of {self}")), @@ -1943,7 +1943,7 @@ impl ResidentLayer { ) -> anyhow::Result<&delta_layer::DeltaLayerInner> { use LayerKind::*; match self.downloaded.get(&self.owner.0, ctx).await? { - Delta(ref d) => Ok(d), + Delta(d) => Ok(d), Image(_) => Err(anyhow::anyhow!("image layer")), } } @@ -1955,7 +1955,7 @@ impl ResidentLayer { ) -> anyhow::Result<&image_layer::ImageLayerInner> { use LayerKind::*; match self.downloaded.get(&self.owner.0, ctx).await? { - Image(ref d) => Ok(d), + Image(d) => Ok(d), Delta(_) => Err(anyhow::anyhow!("delta layer")), } } diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index d93c378ffc..724150d27f 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -1,22 +1,16 @@ use std::time::UNIX_EPOCH; -use pageserver_api::key::{Key, CONTROLFILE_KEY}; +use pageserver_api::key::{CONTROLFILE_KEY, Key}; use tokio::task::JoinSet; -use utils::{ - completion::{self, Completion}, - id::TimelineId, -}; +use utils::completion::{self, Completion}; +use utils::id::TimelineId; use super::failpoints::{Failpoint, FailpointKind}; use super::*; -use crate::{ - context::DownloadBehavior, - tenant::{ - harness::test_img, - storage_layer::{IoConcurrency, LayerVisibilityHint}, - }, -}; -use crate::{task_mgr::TaskKind, tenant::harness::TenantHarness}; +use crate::context::DownloadBehavior; +use crate::task_mgr::TaskKind; +use crate::tenant::harness::{TenantHarness, test_img}; +use crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint}; /// Used in tests to advance a future to wanted await point, and not futher. const ADVANCE: std::time::Duration = std::time::Duration::from_secs(3600); @@ -771,10 +765,12 @@ async fn evict_and_wait_does_not_wait_for_download() { let (arrival, _download_arrived) = utils::completion::channel(); layer.enable_failpoint(Failpoint::WaitBeforeDownloading(Some(arrival), barrier)); - let mut download = std::pin::pin!(layer - .0 - .get_or_maybe_download(true, None) - .instrument(download_span)); + let mut download = std::pin::pin!( + layer + .0 + .get_or_maybe_download(true, None) + .instrument(download_span) + ); assert!( !layer.is_likely_resident(), diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs index 2097e90764..ed16dcaa0d 100644 --- a/pageserver/src/tenant/storage_layer/layer_desc.rs +++ b/pageserver/src/tenant/storage_layer/layer_desc.rs @@ -1,16 +1,15 @@ use core::fmt::Display; -use pageserver_api::shard::TenantShardId; use std::ops::Range; -use utils::{id::TimelineId, lsn::Lsn}; use pageserver_api::key::Key; - -use super::{DeltaLayerName, ImageLayerName, LayerName}; - +use pageserver_api::shard::TenantShardId; use serde::{Deserialize, Serialize}; - #[cfg(test)] use utils::id::TenantId; +use utils::id::TimelineId; +use utils::lsn::Lsn; + +use super::{DeltaLayerName, ImageLayerName, LayerName}; /// A unique identifier of a persistent layer. /// diff --git a/pageserver/src/tenant/storage_layer/layer_name.rs b/pageserver/src/tenant/storage_layer/layer_name.rs index addf3b85d9..0f7995f87b 100644 --- a/pageserver/src/tenant/storage_layer/layer_name.rs +++ b/pageserver/src/tenant/storage_layer/layer_name.rs @@ -1,12 +1,12 @@ //! //! Helper functions for dealing with filenames of the image and delta layer files. //! -use pageserver_api::key::Key; use std::cmp::Ordering; use std::fmt; use std::ops::Range; use std::str::FromStr; +use pageserver_api::key::Key; use utils::lsn::Lsn; use super::PersistentLayerDesc; @@ -305,7 +305,7 @@ impl FromStr for LayerName { (None, None) => { return Err(format!( "neither delta nor image layer file name: {value:?}" - )) + )); } (Some(delta), None) => Self::Delta(delta), (None, Some(image)) => Self::Image(image), diff --git a/pageserver/src/tenant/storage_layer/merge_iterator.rs b/pageserver/src/tenant/storage_layer/merge_iterator.rs index 19cfcb0867..76cdddd06a 100644 --- a/pageserver/src/tenant/storage_layer/merge_iterator.rs +++ b/pageserver/src/tenant/storage_layer/merge_iterator.rs @@ -1,21 +1,16 @@ -use std::{ - cmp::Ordering, - collections::{binary_heap, BinaryHeap}, - sync::Arc, -}; +use std::cmp::Ordering; +use std::collections::{BinaryHeap, binary_heap}; +use std::sync::Arc; use anyhow::bail; use pageserver_api::key::Key; +use pageserver_api::value::Value; use utils::lsn::Lsn; +use super::delta_layer::{DeltaLayerInner, DeltaLayerIterator}; +use super::image_layer::{ImageLayerInner, ImageLayerIterator}; +use super::{PersistentLayerDesc, PersistentLayerKey}; use crate::context::RequestContext; -use pageserver_api::value::Value; - -use super::{ - delta_layer::{DeltaLayerInner, DeltaLayerIterator}, - image_layer::{ImageLayerInner, ImageLayerIterator}, - PersistentLayerDesc, PersistentLayerKey, -}; #[derive(Clone, Copy)] pub(crate) enum LayerRef<'a> { @@ -349,24 +344,18 @@ impl<'a> MergeIterator<'a> { #[cfg(test)] mod tests { - use super::*; - use itertools::Itertools; use pageserver_api::key::Key; - use utils::lsn::Lsn; - - use crate::{ - tenant::{ - harness::{TenantHarness, TIMELINE_ID}, - storage_layer::delta_layer::test::{produce_delta_layer, sort_delta}, - }, - DEFAULT_PG_VERSION, - }; - - #[cfg(feature = "testing")] - use crate::tenant::storage_layer::delta_layer::test::sort_delta_value; #[cfg(feature = "testing")] use pageserver_api::record::NeonWalRecord; + use utils::lsn::Lsn; + + use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + #[cfg(feature = "testing")] + use crate::tenant::storage_layer::delta_layer::test::sort_delta_value; + use crate::tenant::storage_layer::delta_layer::test::{produce_delta_layer, sort_delta}; async fn assert_merge_iter_equal( merge_iter: &mut MergeIterator<'_>, diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index b12655b0f3..670f9ad87f 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -8,24 +8,24 @@ use std::sync::Arc; use std::time::{Duration, Instant}; use once_cell::sync::Lazy; +use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD; use rand::Rng; use scopeguard::defer; use tokio::sync::{Semaphore, SemaphorePermit}; use tokio_util::sync::CancellationToken; use tracing::*; - -use crate::context::{DownloadBehavior, RequestContext}; -use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS}; -use crate::task_mgr::{self, TaskKind, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS}; -use crate::tenant::throttle::Stats; -use crate::tenant::timeline::compaction::CompactionOutcome; -use crate::tenant::timeline::CompactionError; -use crate::tenant::{Tenant, TenantState}; -use pageserver_api::config::tenant_conf_defaults::DEFAULT_COMPACTION_PERIOD; use utils::backoff::exponential_backoff_duration; use utils::completion::Barrier; use utils::pausable_failpoint; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::metrics::{self, BackgroundLoopSemaphoreMetricsRecorder, TENANT_TASK_EVENTS}; +use crate::task_mgr::{self, BACKGROUND_RUNTIME, TOKIO_WORKER_THREADS, TaskKind}; +use crate::tenant::throttle::Stats; +use crate::tenant::timeline::CompactionError; +use crate::tenant::timeline::compaction::CompactionOutcome; +use crate::tenant::{Tenant, TenantState}; + /// Semaphore limiting concurrent background tasks (across all tenants). /// /// We use 3/4 Tokio threads, to avoid blocking all threads in case we do any CPU-heavy work. @@ -287,11 +287,12 @@ fn log_compaction_error( sleep_duration: Duration, task_cancelled: bool, ) { - use crate::pgdatadir_mapping::CollectKeySpaceError; - use crate::tenant::upload_queue::NotInitialized; - use crate::tenant::PageReconstructError; use CompactionError::*; + use crate::pgdatadir_mapping::CollectKeySpaceError; + use crate::tenant::PageReconstructError; + use crate::tenant::upload_queue::NotInitialized; + let level = match err { ShuttingDown => return, Offload(_) => Level::ERROR, diff --git a/pageserver/src/tenant/throttle.rs b/pageserver/src/tenant/throttle.rs index 300d779125..6c37c3771b 100644 --- a/pageserver/src/tenant/throttle.rs +++ b/pageserver/src/tenant/throttle.rs @@ -1,10 +1,6 @@ -use std::{ - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, - }, - time::Instant, -}; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Instant; use arc_swap::ArcSwap; use utils::leaky_bucket::{LeakyBucketConfig, RateLimiter}; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index a80d407d54..cbbcf5d358 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -14,55 +14,6 @@ pub mod span; pub mod uninit; mod walreceiver; -use anyhow::{anyhow, bail, ensure, Context, Result}; -use arc_swap::{ArcSwap, ArcSwapOption}; -use bytes::Bytes; -use camino::Utf8Path; -use chrono::{DateTime, Utc}; -use compaction::{CompactionOutcome, GcCompactionCombinedSettings}; -use enumset::EnumSet; -use fail::fail_point; -use futures::FutureExt; -use futures::{stream::FuturesUnordered, StreamExt}; -use handle::ShardTimelineId; -use layer_manager::Shutdown; -use offload::OffloadError; -use once_cell::sync::Lazy; -use pageserver_api::models::PageTraceEvent; -use pageserver_api::{ - key::{ - KEY_SIZE, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE, - SPARSE_RANGE, - }, - keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning}, - models::{ - CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings, - DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, - InMemoryLayerInfo, LayerMapInfo, LsnLease, TimelineState, - }, - reltag::BlockNumber, - shard::{ShardIdentity, ShardNumber, TenantShardId}, -}; -use rand::Rng; -use remote_storage::DownloadError; -use serde_with::serde_as; -use storage_broker::BrokerClientChannel; -use tokio::runtime::Handle; -use tokio::sync::mpsc::Sender; -use tokio::sync::{oneshot, watch, Notify}; -use tokio_util::sync::CancellationToken; -use tracing::*; -use utils::critical; -use utils::rate_limit::RateLimit; -use utils::{ - fs_ext, - guard_arc_swap::GuardArcSwap, - pausable_failpoint, - postgres_client::PostgresClientProtocol, - sync::gate::{Gate, GateGuard}, -}; -use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta}; - use std::array; use std::cmp::{max, min}; use std::collections::btree_map::Entry; @@ -72,74 +23,58 @@ use std::sync::atomic::{AtomicBool, AtomicU64, Ordering as AtomicOrdering}; use std::sync::{Arc, Mutex, OnceLock, RwLock, Weak}; use std::time::{Duration, Instant, SystemTime}; -use crate::l0_flush::{self, L0FlushGlobalState}; -use crate::tenant::storage_layer::ImageLayerName; -use crate::{ - aux_file::AuxFileSizeEstimator, - page_service::TenantManagerTypes, - tenant::{ - config::AttachmentMode, - layer_map::{LayerMap, SearchResult}, - metadata::TimelineMetadata, - storage_layer::{ - inmemory_layer::IndexEntry, BatchLayerWriter, IoConcurrency, PersistentLayerDesc, - ValueReconstructSituation, - }, - }, - walingest::WalLagCooldown, - walredo, -}; -use crate::{ - context::{DownloadBehavior, RequestContext}, - disk_usage_eviction_task::DiskUsageEvictionInfo, - pgdatadir_mapping::CollectKeySpaceError, -}; -use crate::{ - disk_usage_eviction_task::finite_f32, - tenant::storage_layer::{ - AsLayerDesc, DeltaLayerWriter, EvictionError, ImageLayerWriter, InMemoryLayer, Layer, - LayerAccessStatsReset, LayerName, ResidentLayer, ValueReconstructState, - ValuesReconstructState, - }, -}; -use crate::{ - disk_usage_eviction_task::EvictionCandidate, tenant::storage_layer::delta_layer::DeltaEntry, -}; -use crate::{ - metrics::ScanLatencyOngoingRecording, tenant::timeline::logical_size::CurrentLogicalSize, -}; -use crate::{ - pgdatadir_mapping::DirectoryKind, - virtual_file::{MaybeFatalIo, VirtualFile}, -}; -use crate::{pgdatadir_mapping::LsnForTimestamp, tenant::tasks::BackgroundLoopKind}; -use crate::{pgdatadir_mapping::MAX_AUX_FILE_V2_DELTAS, tenant::storage_layer::PersistentLayerKey}; +use anyhow::{Context, Result, anyhow, bail, ensure}; +use arc_swap::{ArcSwap, ArcSwapOption}; +use bytes::Bytes; +use camino::Utf8Path; +use chrono::{DateTime, Utc}; +use compaction::{CompactionOutcome, GcCompactionCombinedSettings}; +use enumset::EnumSet; +use fail::fail_point; +use futures::stream::FuturesUnordered; +use futures::{FutureExt, StreamExt}; +use handle::ShardTimelineId; +use layer_manager::Shutdown; +use offload::OffloadError; +use once_cell::sync::Lazy; use pageserver_api::config::tenant_conf_defaults::DEFAULT_PITR_INTERVAL; - -use crate::config::PageServerConf; -use crate::keyspace::{KeyPartitioning, KeySpace}; -use crate::metrics::{TimelineMetrics, DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL}; -use crate::pgdatadir_mapping::{CalculateLogicalSizeError, MetricsUpdate}; -use crate::tenant::config::TenantConfOpt; -use pageserver_api::reltag::RelTag; -use pageserver_api::shard::ShardIndex; - -use postgres_connection::PgConnectionConfig; -use postgres_ffi::{to_pg_timestamp, v14::xlog_utils, WAL_SEGMENT_SIZE}; -use utils::{ - completion, - generation::Generation, - id::TimelineId, - lsn::{AtomicLsn, Lsn, RecordLsn}, - seqwait::SeqWait, - simple_rcu::{Rcu, RcuReadGuard}, +use pageserver_api::key::{ + KEY_SIZE, Key, METADATA_KEY_BEGIN_PREFIX, METADATA_KEY_END_PREFIX, NON_INHERITED_RANGE, + SPARSE_RANGE, }; - -use crate::task_mgr; -use crate::task_mgr::TaskKind; -use crate::tenant::gc_result::GcResult; -use crate::ZERO_PAGE; -use pageserver_api::key::Key; +use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning}; +use pageserver_api::models::{ + CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings, + DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, + InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, TimelineState, +}; +use pageserver_api::reltag::{BlockNumber, RelTag}; +use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId}; +#[cfg(test)] +use pageserver_api::value::Value; +use postgres_connection::PgConnectionConfig; +use postgres_ffi::v14::xlog_utils; +use postgres_ffi::{WAL_SEGMENT_SIZE, to_pg_timestamp}; +use rand::Rng; +use remote_storage::DownloadError; +use serde_with::serde_as; +use storage_broker::BrokerClientChannel; +use tokio::runtime::Handle; +use tokio::sync::mpsc::Sender; +use tokio::sync::{Notify, oneshot, watch}; +use tokio_util::sync::CancellationToken; +use tracing::*; +use utils::generation::Generation; +use utils::guard_arc_swap::GuardArcSwap; +use utils::id::TimelineId; +use utils::lsn::{AtomicLsn, Lsn, RecordLsn}; +use utils::postgres_client::PostgresClientProtocol; +use utils::rate_limit::RateLimit; +use utils::seqwait::SeqWait; +use utils::simple_rcu::{Rcu, RcuReadGuard}; +use utils::sync::gate::{Gate, GateGuard}; +use utils::{completion, critical, fs_ext, pausable_failpoint}; +use wal_decoder::serialized_batch::{SerializedValueBatch, ValueMeta}; use self::delete::DeleteTimelineFlow; pub(super) use self::eviction_task::EvictionTaskTenantState; @@ -147,24 +82,48 @@ use self::eviction_task::EvictionTaskTimelineState; use self::layer_manager::LayerManager; use self::logical_size::LogicalSize; use self::walreceiver::{WalReceiver, WalReceiverConf}; - -use super::remote_timeline_client::index::GcCompactionState; +use super::config::TenantConf; +use super::remote_timeline_client::index::{GcCompactionState, IndexPart}; +use super::remote_timeline_client::{RemoteTimelineClient, WaitCompletionError}; +use super::secondary::heatmap::HeatMapLayer; +use super::storage_layer::{LayerFringe, LayerVisibilityHint, ReadableLayer}; +use super::upload_queue::NotInitialized; use super::{ - config::TenantConf, storage_layer::LayerVisibilityHint, upload_queue::NotInitialized, - MaybeOffloaded, + AttachedTenantConf, GcError, HeatMapTimeline, MaybeOffloaded, + debug_assert_current_span_has_tenant_and_timeline_id, }; -use super::{ - debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf, HeatMapTimeline, +use crate::aux_file::AuxFileSizeEstimator; +use crate::config::PageServerConf; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, finite_f32}; +use crate::keyspace::{KeyPartitioning, KeySpace}; +use crate::l0_flush::{self, L0FlushGlobalState}; +use crate::metrics::{ + DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics, }; -use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; -use super::{ - remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError, - storage_layer::ReadableLayer, +use crate::page_service::TenantManagerTypes; +use crate::pgdatadir_mapping::{ + CalculateLogicalSizeError, CollectKeySpaceError, DirectoryKind, LsnForTimestamp, + MAX_AUX_FILE_V2_DELTAS, MetricsUpdate, }; -use super::{secondary::heatmap::HeatMapLayer, GcError}; - -#[cfg(test)] -use pageserver_api::value::Value; +use crate::task_mgr::TaskKind; +use crate::tenant::config::{AttachmentMode, TenantConfOpt}; +use crate::tenant::gc_result::GcResult; +use crate::tenant::layer_map::{LayerMap, SearchResult}; +use crate::tenant::metadata::TimelineMetadata; +use crate::tenant::storage_layer::delta_layer::DeltaEntry; +use crate::tenant::storage_layer::inmemory_layer::IndexEntry; +use crate::tenant::storage_layer::{ + AsLayerDesc, BatchLayerWriter, DeltaLayerWriter, EvictionError, ImageLayerName, + ImageLayerWriter, InMemoryLayer, IoConcurrency, Layer, LayerAccessStatsReset, LayerName, + PersistentLayerDesc, PersistentLayerKey, ResidentLayer, ValueReconstructSituation, + ValueReconstructState, ValuesReconstructState, +}; +use crate::tenant::tasks::BackgroundLoopKind; +use crate::tenant::timeline::logical_size::CurrentLogicalSize; +use crate::virtual_file::{MaybeFatalIo, VirtualFile}; +use crate::walingest::WalLagCooldown; +use crate::{ZERO_PAGE, task_mgr, walredo}; #[derive(Debug, PartialEq, Eq, Clone, Copy)] pub(crate) enum FlushLoopState { @@ -1474,13 +1433,22 @@ impl Timeline { | TaskKind::WalReceiverConnectionHandler | TaskKind::WalReceiverConnectionPoller => { let is_myself = match who_is_waiting { - WaitLsnWaiter::Timeline(waiter) => Weak::ptr_eq(&waiter.myself, &self.myself), - WaitLsnWaiter::Tenant | WaitLsnWaiter::PageService | WaitLsnWaiter::HttpEndpoint => unreachable!("tenant or page_service context are not expected to have task kind {:?}", ctx.task_kind()), + WaitLsnWaiter::Timeline(waiter) => { + Weak::ptr_eq(&waiter.myself, &self.myself) + } + WaitLsnWaiter::Tenant + | WaitLsnWaiter::PageService + | WaitLsnWaiter::HttpEndpoint => unreachable!( + "tenant or page_service context are not expected to have task kind {:?}", + ctx.task_kind() + ), }; if is_myself { if let Err(current) = self.last_record_lsn.would_wait_for(lsn) { // walingest is the only one that can advance last_record_lsn; it should make sure to never reach here - panic!("this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock"); + panic!( + "this timeline's walingest task is calling wait_lsn({lsn}) but we only have last_record_lsn={current}; would deadlock" + ); } } else { // if another timeline's is waiting for us, there's no deadlock risk because @@ -1509,12 +1477,12 @@ impl Timeline { drop(_timer); let walreceiver_status = self.walreceiver_status(); Err(WaitLsnError::Timeout(format!( - "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}", - lsn, - self.get_last_record_lsn(), - self.get_disk_consistent_lsn(), - walreceiver_status, - ))) + "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}", + lsn, + self.get_last_record_lsn(), + self.get_disk_consistent_lsn(), + walreceiver_status, + ))) } } } @@ -1618,10 +1586,18 @@ impl Timeline { if init || validate { let latest_gc_cutoff_lsn = self.get_applied_gc_cutoff_lsn(); if lsn < *latest_gc_cutoff_lsn { - bail!("tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", lsn, *latest_gc_cutoff_lsn); + bail!( + "tried to request an lsn lease for an lsn below the latest gc cutoff. requested at {} gc cutoff {}", + lsn, + *latest_gc_cutoff_lsn + ); } if lsn < planned_cutoff { - bail!("tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", lsn, planned_cutoff); + bail!( + "tried to request an lsn lease for an lsn below the planned gc cutoff. requested at {} planned gc cutoff {}", + lsn, + planned_cutoff + ); } } @@ -1745,7 +1721,9 @@ impl Timeline { // This is not harmful, but it only happens in relatively rare cases where // time-based checkpoints are not happening fast enough to keep the amount of // ephemeral data within configured limits. It's a sign of stress on the system. - tracing::info!("Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure"); + tracing::info!( + "Early-rolling open layer at size {current_size} (limit {size_override}) due to dirty data pressure" + ); } } @@ -1871,7 +1849,9 @@ impl Timeline { // Last record Lsn could be zero in case the timeline was just created if !last_record_lsn.is_valid() { - warn!("Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}"); + warn!( + "Skipping compaction for potentially just initialized timeline, it has invalid last record lsn: {last_record_lsn}" + ); return Ok(CompactionOutcome::Skipped); } @@ -2033,7 +2013,9 @@ impl Timeline { // `self.remote_client.shutdown().await` above should have already flushed everything from the queue, but // we also do a final check here to ensure that the queue is empty. if !self.remote_client.no_pending_work() { - warn!("still have pending work in remote upload queue, but continuing shutting down anyways"); + warn!( + "still have pending work in remote upload queue, but continuing shutting down anyways" + ); } } } @@ -2042,7 +2024,9 @@ impl Timeline { // drain the upload queue self.remote_client.shutdown().await; if !self.remote_client.no_pending_work() { - warn!("still have pending work in remote upload queue, but continuing shutting down anyways"); + warn!( + "still have pending work in remote upload queue, but continuing shutting down anyways" + ); } } @@ -2946,8 +2930,9 @@ impl Timeline { disk_consistent_lsn: Lsn, index_part: IndexPart, ) -> anyhow::Result<()> { - use init::{Decision::*, Discovered, DismissedLayer}; use LayerName::*; + use init::Decision::*; + use init::{Discovered, DismissedLayer}; let mut guard = self.layers.write().await; @@ -3162,11 +3147,15 @@ impl Timeline { } TimelineState::Loading => { // Import does not return an activated timeline. - info!("discarding priority boost for logical size calculation because timeline is not yet active"); + info!( + "discarding priority boost for logical size calculation because timeline is not yet active" + ); } TimelineState::Active => { // activation should be setting the once cell - warn!("unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work"); + warn!( + "unexpected: cancel_wait_for_background_loop_concurrency_limit_semaphore not set, priority-boosting of logical size calculation will not work" + ); debug_assert!(false); } } @@ -4306,10 +4295,14 @@ impl Timeline { // This path is only taken for tenants with multiple shards: single sharded tenants should // never encounter a gap in the wal. let old_disk_consistent_lsn = self.disk_consistent_lsn.load(); - tracing::debug!("Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}"); + tracing::debug!( + "Advancing disk_consistent_lsn across layer gap {old_disk_consistent_lsn}->{frozen_to_lsn}" + ); if self.set_disk_consistent_lsn(frozen_to_lsn) { if let Err(e) = self.schedule_uploads(frozen_to_lsn, vec![]) { - tracing::warn!("Failed to schedule metadata upload after updating disk_consistent_lsn: {e}"); + tracing::warn!( + "Failed to schedule metadata upload after updating disk_consistent_lsn: {e}" + ); } } } @@ -4534,7 +4527,10 @@ impl Timeline { /// This function must only be used from the layer flush task. fn set_disk_consistent_lsn(&self, new_value: Lsn) -> bool { let old_value = self.disk_consistent_lsn.fetch_max(new_value); - assert!(new_value >= old_value, "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}"); + assert!( + new_value >= old_value, + "disk_consistent_lsn must be growing monotonously at runtime; current {old_value}, offered {new_value}" + ); self.metrics .disk_consistent_lsn_gauge @@ -4829,7 +4825,9 @@ impl Timeline { // any metadata keys, keys, as that would lead to actual data // loss. if img_key.is_rel_fsm_block_key() || img_key.is_rel_vm_block_key() { - warn!("could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}"); + warn!( + "could not reconstruct FSM or VM key {img_key}, filling with zeros: {err:?}" + ); ZERO_PAGE.clone() } else { return Err(CreateImageLayersError::from(err)); @@ -4908,7 +4906,8 @@ impl Timeline { let trigger_generation = delta_files_accessed as usize >= MAX_AUX_FILE_V2_DELTAS; info!( - "metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s", elapsed.as_secs_f64() + "metadata key compaction: trigger_generation={trigger_generation}, delta_files_accessed={delta_files_accessed}, total_kb_retrieved={total_kb_retrieved}, total_keys_retrieved={total_keys_retrieved}, read_time={}s", + elapsed.as_secs_f64() ); if !trigger_generation && mode == ImageLayerCreationMode::Try { @@ -5230,7 +5229,8 @@ impl Timeline { if should_yield { tracing::info!( "preempt image layer generation at {lsn} when processing partition {}..{}: too many L0 layers", - partition.start().unwrap(), partition.end().unwrap() + partition.start().unwrap(), + partition.end().unwrap() ); last_partition_processed = Some(partition.clone()); all_generated = false; @@ -5588,7 +5588,9 @@ impl Timeline { // because we have not implemented L0 => L0 compaction. duplicated_layers.insert(l.layer_desc().key()); } else if LayerMap::is_l0(&l.layer_desc().key_range, l.layer_desc().is_delta) { - return Err(CompactionError::Other(anyhow::anyhow!("compaction generates a L0 layer file as output, which will cause infinite compaction."))); + return Err(CompactionError::Other(anyhow::anyhow!( + "compaction generates a L0 layer file as output, which will cause infinite compaction." + ))); } else { insert_layers.push(l.clone()); } @@ -5712,8 +5714,10 @@ impl Timeline { .await { Ok((index_part, index_generation, _index_mtime)) => { - tracing::info!("GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}", - index_part.metadata.latest_gc_cutoff_lsn()); + tracing::info!( + "GC loaded shard zero metadata (gen {index_generation:?}): latest_gc_cutoff_lsn: {}", + index_part.metadata.latest_gc_cutoff_lsn() + ); Ok(Some(index_part.metadata.latest_gc_cutoff_lsn())) } Err(DownloadError::NotFound) => { @@ -6122,9 +6126,7 @@ impl Timeline { if let Some((img_lsn, img)) = &data.img { trace!( "found page image for key {} at {}, no WAL redo required, req LSN {}", - key, - img_lsn, - request_lsn, + key, img_lsn, request_lsn, ); Ok(img.clone()) } else { @@ -6153,7 +6155,12 @@ impl Timeline { request_lsn ); } else { - trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn); + trace!( + "found {} WAL records that will init the page for {} at {}, performing WAL redo", + data.records.len(), + key, + request_lsn + ); }; let res = self .walredo_mgr @@ -6697,7 +6704,9 @@ impl TimelineWriter<'_> { if let Some(wait_threshold) = wait_threshold { if l0_count >= wait_threshold { - debug!("layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers"); + debug!( + "layer roll waiting for flush due to compaction backpressure at {l0_count} L0 layers" + ); self.tl.wait_flush_completion(flush_id).await?; } } @@ -6884,17 +6893,15 @@ mod tests { use pageserver_api::key::Key; use pageserver_api::value::Value; use tracing::Instrument; - use utils::{id::TimelineId, lsn::Lsn}; - - use crate::tenant::{ - harness::{test_img, TenantHarness}, - layer_map::LayerMap, - storage_layer::{Layer, LayerName, LayerVisibilityHint}, - timeline::{DeltaLayerTestDesc, EvictionError}, - PreviousHeatmap, Timeline, - }; + use utils::id::TimelineId; + use utils::lsn::Lsn; use super::HeatMapTimeline; + use crate::tenant::harness::{TenantHarness, test_img}; + use crate::tenant::layer_map::LayerMap; + use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint}; + use crate::tenant::timeline::{DeltaLayerTestDesc, EvictionError}; + use crate::tenant::{PreviousHeatmap, Timeline}; fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) { assert_eq!(lhs.layers.len(), rhs.layers.len()); diff --git a/pageserver/src/tenant/timeline/analysis.rs b/pageserver/src/tenant/timeline/analysis.rs index 6009b0b79a..96864ec44b 100644 --- a/pageserver/src/tenant/timeline/analysis.rs +++ b/pageserver/src/tenant/timeline/analysis.rs @@ -1,4 +1,5 @@ -use std::{collections::BTreeSet, ops::Range}; +use std::collections::BTreeSet; +use std::ops::Range; use utils::lsn::Lsn; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index c6ef5165ef..d221bf53d2 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -8,30 +8,35 @@ use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque}; use std::ops::{Deref, Range}; use std::sync::Arc; -use super::layer_manager::LayerManager; -use super::{ - CompactFlags, CompactOptions, CreateImageLayersError, DurationRecorder, GetVectoredError, - ImageLayerCreationMode, LastImageLayerCreationStatus, PageReconstructError, RecordedDuration, - Timeline, -}; - -use anyhow::{anyhow, bail, Context}; +use anyhow::{Context, anyhow, bail}; use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; use itertools::Itertools; use once_cell::sync::Lazy; -use pageserver_api::key::KEY_SIZE; -use pageserver_api::keyspace::ShardedRange; +use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE; +use pageserver_api::key::{KEY_SIZE, Key}; +use pageserver_api::keyspace::{KeySpace, ShardedRange}; use pageserver_api::models::CompactInfoResponse; +use pageserver_api::record::NeonWalRecord; use pageserver_api::shard::{ShardCount, ShardIdentity, TenantShardId}; +use pageserver_api::value::Value; +use pageserver_compaction::helpers::{fully_contains, overlaps_with}; +use pageserver_compaction::interface::*; use serde::Serialize; use tokio::sync::{OwnedSemaphorePermit, Semaphore}; use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, info_span, trace, warn, Instrument}; +use tracing::{Instrument, debug, error, info, info_span, trace, warn}; use utils::critical; use utils::id::TimelineId; +use utils::lsn::Lsn; +use super::layer_manager::LayerManager; +use super::{ + CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder, + GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, PageReconstructError, + RecordedDuration, Timeline, +}; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; use crate::pgdatadir_mapping::CollectKeySpaceError; @@ -39,8 +44,8 @@ use crate::statvfs::Statvfs; use crate::tenant::checks::check_valid_layermap; use crate::tenant::gc_block::GcBlock; use crate::tenant::layer_map::LayerMap; -use crate::tenant::remote_timeline_client::index::GcCompactionState; use crate::tenant::remote_timeline_client::WaitCompletionError; +use crate::tenant::remote_timeline_client::index::GcCompactionState; use crate::tenant::storage_layer::batch_split_writer::{ BatchWriterResult, SplitDeltaLayerWriter, SplitImageLayerWriter, }; @@ -49,24 +54,12 @@ use crate::tenant::storage_layer::merge_iterator::MergeIterator; use crate::tenant::storage_layer::{ AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState, }; -use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter}; -use crate::tenant::timeline::{ImageLayerCreationOutcome, IoConcurrency}; -use crate::tenant::timeline::{Layer, ResidentLayer}; -use crate::tenant::{gc_block, DeltaLayer, MaybeOffloaded}; +use crate::tenant::timeline::{ + DeltaLayerWriter, ImageLayerCreationOutcome, ImageLayerWriter, IoConcurrency, Layer, + ResidentLayer, drop_rlock, +}; +use crate::tenant::{DeltaLayer, MaybeOffloaded, gc_block}; use crate::virtual_file::{MaybeFatalIo, VirtualFile}; -use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE; - -use pageserver_api::key::Key; -use pageserver_api::keyspace::KeySpace; -use pageserver_api::record::NeonWalRecord; -use pageserver_api::value::Value; - -use utils::lsn::Lsn; - -use pageserver_compaction::helpers::{fully_contains, overlaps_with}; -use pageserver_compaction::interface::*; - -use super::CompactionError; /// Maximum number of deltas before generating an image layer in bottom-most compaction. const COMPACTION_DELTA_THRESHOLD: usize = 5; @@ -282,8 +275,7 @@ impl GcCompactionQueue { if l2_size == 0 && l1_size >= gc_compaction_initial_threshold_kb * 1024 { info!( "trigger auto-compaction because l1_size={} >= gc_compaction_initial_threshold_kb={}", - l1_size, - gc_compaction_initial_threshold_kb + l1_size, gc_compaction_initial_threshold_kb ); return true; } @@ -294,9 +286,7 @@ impl GcCompactionQueue { if l1_size as f64 / l2_size as f64 >= (gc_compaction_ratio_percent as f64 / 100.0) { info!( "trigger auto-compaction because l1_size={} / l2_size={} > gc_compaction_ratio_percent={}", - l1_size, - l2_size, - gc_compaction_ratio_percent + l1_size, l2_size, gc_compaction_ratio_percent ); return true; } @@ -355,7 +345,9 @@ impl GcCompactionQueue { gc_block: &GcBlock, auto: bool, ) -> Result<(), CompactionError> { - info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + info!( + "running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs" + ); let jobs = timeline .gc_compaction_split_jobs( GcCompactJob::from_compact_options(options.clone()), @@ -419,7 +411,10 @@ impl GcCompactionQueue { guard.queued.push_front(item); } } - info!("scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs", jobs_len); + info!( + "scheduled enhanced gc bottom-most compaction with sub-compaction, split into {} jobs", + jobs_len + ); } Ok(()) } @@ -433,7 +428,9 @@ impl GcCompactionQueue { timeline: &Arc, ) -> Result { let Ok(_one_op_at_a_time_guard) = self.consumer_lock.try_lock() else { - return Err(CompactionError::AlreadyRunning("cannot run gc-compaction because another gc-compaction is running. This should not happen because we only call this function from the gc-compaction queue.")); + return Err(CompactionError::AlreadyRunning( + "cannot run gc-compaction because another gc-compaction is running. This should not happen because we only call this function from the gc-compaction queue.", + )); }; let has_pending_tasks; let Some((id, item)) = ({ @@ -459,9 +456,14 @@ impl GcCompactionQueue { .flags .contains(CompactFlags::EnhancedGcBottomMostCompaction) { - warn!("ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", options); + warn!( + "ignoring scheduled compaction task: scheduled task must be gc compaction: {:?}", + options + ); } else if options.sub_compaction { - info!("running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + info!( + "running scheduled enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs" + ); self.handle_sub_compaction(id, options, timeline, gc_block, auto) .await?; } else { @@ -964,7 +966,9 @@ impl Timeline { self.upload_new_image_layers(image_layers)?; if let LastImageLayerCreationStatus::Incomplete { .. } = outcome { // Yield and do not do any other kind of compaction. - info!("skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction)."); + info!( + "skipping shard ancestor compaction due to pending image layer generation tasks (preempted by L0 compaction)." + ); return Ok(CompactionOutcome::YieldForL0); } } @@ -990,7 +994,7 @@ impl Timeline { Err(err) => error!("could not compact, repartitioning keyspace failed: {err:?}"), }; - let partition_count = self.partitioning.read().0 .0.parts.len(); + let partition_count = self.partitioning.read().0.0.parts.len(); // 4. Shard ancestor compaction @@ -1199,7 +1203,7 @@ impl Timeline { Ok(()) => (), Err(WaitCompletionError::NotInitialized(ni)) => return Err(CompactionError::from(ni)), Err(WaitCompletionError::UploadQueueShutDownOrStopped) => { - return Err(CompactionError::ShuttingDown) + return Err(CompactionError::ShuttingDown); } } @@ -1494,7 +1498,7 @@ impl Timeline { let last_record_lsn = self.get_last_record_lsn(); let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128; let min_hole_coverage_size = 3; // TODO: something more flexible? - // min-heap (reserve space for one more element added before eviction) + // min-heap (reserve space for one more element added before eviction) let mut heap: BinaryHeap = BinaryHeap::with_capacity(max_holes + 1); let mut prev: Option = None; @@ -2357,8 +2361,14 @@ impl Timeline { let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */ if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space { - return Err(anyhow!("not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}", - available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size)); + return Err(anyhow!( + "not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}", + available_space, + allocated_space, + all_layer_size, + remote_layer_size, + all_layer_size + remote_layer_size + )); } Ok(()) } @@ -2397,7 +2407,9 @@ impl Timeline { }; if compact_below_lsn == Lsn::INVALID { - tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction"); + tracing::warn!( + "no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction" + ); return Ok(vec![]); } @@ -2542,7 +2554,9 @@ impl Timeline { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); if sub_compaction { - info!("running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs"); + info!( + "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs" + ); let jobs = self .gc_compaction_split_jobs(job, options.sub_compaction_max_job_size_mb) .await?; @@ -2594,7 +2608,13 @@ impl Timeline { let debug_mode = cfg!(debug_assertions) || cfg!(feature = "testing"); - info!("running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}", compact_key_range.start, compact_key_range.end, compact_lsn_range.start, compact_lsn_range.end); + info!( + "running enhanced gc bottom-most compaction, dry_run={dry_run}, compact_key_range={}..{}, compact_lsn_range={}..{}", + compact_key_range.start, + compact_key_range.end, + compact_lsn_range.start, + compact_lsn_range.end + ); scopeguard::defer! { info!("done enhanced gc bottom-most compaction"); @@ -2623,7 +2643,9 @@ impl Timeline { let mut gc_cutoff = if compact_lsn_range.end == Lsn::MAX { if real_gc_cutoff == Lsn::INVALID { // If the gc_cutoff is not generated yet, we should not compact anything. - tracing::warn!("no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction"); + tracing::warn!( + "no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction" + ); return Ok(()); } real_gc_cutoff @@ -2631,7 +2653,10 @@ impl Timeline { compact_lsn_range.end }; if gc_cutoff > real_gc_cutoff { - warn!("provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff", gc_cutoff, real_gc_cutoff); + warn!( + "provided compact_lsn_range.end={} is larger than the real_gc_cutoff={}, using the real gc cutoff", + gc_cutoff, real_gc_cutoff + ); gc_cutoff = real_gc_cutoff; } gc_cutoff @@ -2655,7 +2680,10 @@ impl Timeline { .map(|desc| desc.get_lsn_range().end) .max() else { - info!("no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff); + info!( + "no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", + gc_cutoff + ); return Ok(()); }; // Next, if the user specifies compact_lsn_range.start, we need to filter some layers out. All the layers (strictly) below @@ -2673,7 +2701,10 @@ impl Timeline { .map(|desc| desc.get_lsn_range().start) .min() else { - info!("no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}", compact_lsn_range.end); + info!( + "no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}", + compact_lsn_range.end + ); return Ok(()); }; // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key @@ -2696,7 +2727,10 @@ impl Timeline { } } if selected_layers.is_empty() { - info!("no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compact_key_range.start, compact_key_range.end); + info!( + "no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", + gc_cutoff, compact_key_range.start, compact_key_range.end + ); return Ok(()); } retain_lsns_below_horizon.sort(); @@ -2778,7 +2812,10 @@ impl Timeline { .map(|layer| layer.layer_desc().layer_name()) .collect_vec(); if let Some(err) = check_valid_layermap(&layer_names) { - bail!("gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err); + bail!( + "gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", + err + ); } // The maximum LSN we are processing in this compaction loop let end_lsn = job_desc @@ -3185,7 +3222,10 @@ impl Timeline { // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails. if let Some(err) = check_valid_layermap(&final_layers) { - bail!("gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err); + bail!( + "gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", + err + ); } // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only @@ -3250,7 +3290,8 @@ impl Timeline { if let Some(to) = compact_to_set.get(&layer.layer_desc().key()) { tracing::info!( "skipping delete {} because found same layer key at different generation {}", - layer, to + layer, + to ); } else { compact_from.push(layer.clone()); diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index f4ae1ea166..7cdc69e55f 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -1,26 +1,26 @@ -use std::{ - ops::{Deref, DerefMut}, - sync::Arc, -}; +use std::ops::{Deref, DerefMut}; +use std::sync::Arc; use anyhow::Context; -use pageserver_api::{models::TimelineState, shard::TenantShardId}; +use pageserver_api::models::TimelineState; +use pageserver_api::shard::TenantShardId; use remote_storage::DownloadError; use tokio::sync::OwnedMutexGuard; -use tracing::{error, info, info_span, instrument, Instrument}; -use utils::{crashsafe, fs_ext, id::TimelineId, pausable_failpoint}; +use tracing::{Instrument, error, info, info_span, instrument}; +use utils::id::TimelineId; +use utils::{crashsafe, fs_ext, pausable_failpoint}; -use crate::{ - config::PageServerConf, - task_mgr::{self, TaskKind}, - tenant::{ - metadata::TimelineMetadata, - remote_timeline_client::{PersistIndexPartWithDeletedFlagError, RemoteTimelineClient}, - CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, - TenantManifestError, Timeline, TimelineOrOffloaded, - }, - virtual_file::MaybeFatalIo, +use crate::config::PageServerConf; +use crate::task_mgr::{self, TaskKind}; +use crate::tenant::metadata::TimelineMetadata; +use crate::tenant::remote_timeline_client::{ + PersistIndexPartWithDeletedFlagError, RemoteTimelineClient, }; +use crate::tenant::{ + CreateTimelineCause, DeleteTimelineError, MaybeDeletedIndexPart, Tenant, TenantManifestError, + Timeline, TimelineOrOffloaded, +}; +use crate::virtual_file::MaybeFatalIo; /// Mark timeline as deleted in S3 so we won't pick it up next time /// during attach or pageserver restart. diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index e0084d3eef..c3a7433062 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -1,25 +1,27 @@ -use std::{collections::HashSet, sync::Arc}; +use std::collections::HashSet; +use std::sync::Arc; -use super::{layer_manager::LayerManager, FlushLayerError, Timeline}; -use crate::{ - context::{DownloadBehavior, RequestContext}, - task_mgr::TaskKind, - tenant::{ - remote_timeline_client::index::GcBlockingReason::DetachAncestor, - storage_layer::{ - layer::local_layer_path, AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer, - }, - Tenant, - }, - virtual_file::{MaybeFatalIo, VirtualFile}, -}; use anyhow::Context; use http_utils::error::ApiError; -use pageserver_api::{models::detach_ancestor::AncestorDetached, shard::ShardIdentity}; +use pageserver_api::models::detach_ancestor::AncestorDetached; +use pageserver_api::shard::ShardIdentity; use tokio::sync::Semaphore; use tokio_util::sync::CancellationToken; use tracing::Instrument; -use utils::{completion, generation::Generation, id::TimelineId, lsn::Lsn}; +use utils::completion; +use utils::generation::Generation; +use utils::id::TimelineId; +use utils::lsn::Lsn; + +use super::layer_manager::LayerManager; +use super::{FlushLayerError, Timeline}; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::TaskKind; +use crate::tenant::Tenant; +use crate::tenant::remote_timeline_client::index::GcBlockingReason::DetachAncestor; +use crate::tenant::storage_layer::layer::local_layer_path; +use crate::tenant::storage_layer::{AsLayerDesc as _, DeltaLayerWriter, Layer, ResidentLayer}; +use crate::virtual_file::{MaybeFatalIo, VirtualFile}; #[derive(Debug, thiserror::Error)] pub(crate) enum Error { @@ -64,9 +66,10 @@ impl Error { where F: Fn(anyhow::Error) -> Error, { + use remote_storage::TimeoutOrCancel; + use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::upload_queue::NotInitialized; - use remote_storage::TimeoutOrCancel; if e.is::() || TimeoutOrCancel::caused_by_cancel(&e) @@ -780,7 +783,7 @@ pub(super) async fn detach_and_reparent( // TODO: make sure there are no `?` before tenant_reset from after a questionmark from // here. panic!( - "bug: detach_and_reparent called on a timeline which has not been detached or which has no live ancestor" + "bug: detach_and_reparent called on a timeline which has not been detached or which has no live ancestor" ); } }; diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 77c33349e0..187d9f248e 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -13,34 +13,27 @@ //! Items with parentheses are not (yet) touched by this task. //! //! See write-up on restart on-demand download spike: -use std::{ - collections::HashMap, - ops::ControlFlow, - sync::Arc, - time::{Duration, SystemTime}, -}; +use std::collections::HashMap; +use std::ops::ControlFlow; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; use pageserver_api::models::{EvictionPolicy, EvictionPolicyLayerAccessThreshold}; use tokio::time::Instant; use tokio_util::sync::CancellationToken; -use tracing::{debug, info, info_span, instrument, warn, Instrument}; - -use crate::{ - context::{DownloadBehavior, RequestContext}, - pgdatadir_mapping::CollectKeySpaceError, - task_mgr::{self, TaskKind, BACKGROUND_RUNTIME}, - tenant::{ - size::CalculateSyntheticSizeError, - storage_layer::LayerVisibilityHint, - tasks::{sleep_random, BackgroundLoopKind, BackgroundLoopSemaphorePermit}, - timeline::EvictionError, - LogicalSizeCalculationCause, Tenant, - }, -}; - -use utils::{completion, sync::gate::GateGuard}; +use tracing::{Instrument, debug, info, info_span, instrument, warn}; +use utils::completion; +use utils::sync::gate::GateGuard; use super::Timeline; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::pgdatadir_mapping::CollectKeySpaceError; +use crate::task_mgr::{self, BACKGROUND_RUNTIME, TaskKind}; +use crate::tenant::size::CalculateSyntheticSizeError; +use crate::tenant::storage_layer::LayerVisibilityHint; +use crate::tenant::tasks::{BackgroundLoopKind, BackgroundLoopSemaphorePermit, sleep_random}; +use crate::tenant::timeline::EvictionError; +use crate::tenant::{LogicalSizeCalculationCause, Tenant}; #[derive(Default)] pub struct EvictionTaskTimelineState { diff --git a/pageserver/src/tenant/timeline/handle.rs b/pageserver/src/tenant/timeline/handle.rs index 5b39daaaf8..67fb89c433 100644 --- a/pageserver/src/tenant/timeline/handle.rs +++ b/pageserver/src/tenant/timeline/handle.rs @@ -202,18 +202,13 @@ //! to the parent shard during a shard split. Eventually, the shard split task will //! shut down the parent => case (1). -use std::collections::hash_map; -use std::collections::HashMap; -use std::sync::Arc; -use std::sync::Mutex; -use std::sync::Weak; +use std::collections::{HashMap, hash_map}; +use std::sync::{Arc, Mutex, Weak}; use pageserver_api::shard::ShardIdentity; -use tracing::instrument; -use tracing::trace; +use tracing::{instrument, trace}; use utils::id::TimelineId; -use utils::shard::ShardIndex; -use utils::shard::ShardNumber; +use utils::shard::{ShardIndex, ShardNumber}; use crate::tenant::mgr::ShardSelector; @@ -631,12 +626,10 @@ impl HandleInner { mod tests { use std::sync::Weak; - use pageserver_api::{ - key::{rel_block_to_key, Key, DBDIR_KEY}, - models::ShardParameters, - reltag::RelTag, - shard::ShardStripeSize, - }; + use pageserver_api::key::{DBDIR_KEY, Key, rel_block_to_key}; + use pageserver_api::models::ShardParameters; + use pageserver_api::reltag::RelTag; + use pageserver_api::shard::ShardStripeSize; use utils::shard::ShardCount; use super::*; diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs index 0ba9753e85..27243ba378 100644 --- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs +++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs @@ -3,9 +3,10 @@ //! Provides utilities to spawn and abort a background task where the downloads happen. //! See /v1/tenant/:tenant_shard_id/timeline/:timeline_id/download_heatmap_layers. +use std::sync::{Arc, Mutex}; + use futures::StreamExt; use http_utils::error::ApiError; -use std::sync::{Arc, Mutex}; use tokio_util::sync::CancellationToken; use utils::sync::gate::Gate; diff --git a/pageserver/src/tenant/timeline/import_pgdata.rs b/pageserver/src/tenant/timeline/import_pgdata.rs index 6940179ae9..8b94a114d6 100644 --- a/pageserver/src/tenant/timeline/import_pgdata.rs +++ b/pageserver/src/tenant/timeline/import_pgdata.rs @@ -1,14 +1,14 @@ use std::sync::Arc; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use remote_storage::RemotePath; use tokio_util::sync::CancellationToken; -use tracing::{info, info_span, Instrument}; +use tracing::{Instrument, info, info_span}; use utils::lsn::Lsn; -use crate::{context::RequestContext, tenant::metadata::TimelineMetadata}; - use super::Timeline; +use crate::context::RequestContext; +use crate::tenant::metadata::TimelineMetadata; mod flow; mod importbucket_client; diff --git a/pageserver/src/tenant/timeline/import_pgdata/flow.rs b/pageserver/src/tenant/timeline/import_pgdata/flow.rs index 4388072606..3ef82b3658 100644 --- a/pageserver/src/tenant/timeline/import_pgdata/flow.rs +++ b/pageserver/src/tenant/timeline/import_pgdata/flow.rs @@ -28,52 +28,38 @@ //! An incomplete set of TODOs from the Hackathon: //! - version-specific CheckPointData (=> pgv abstraction, already exists for regular walingest) +use std::collections::HashSet; +use std::ops::Range; use std::sync::Arc; use anyhow::{bail, ensure}; use bytes::Bytes; - use itertools::Itertools; -use pageserver_api::{ - key::{rel_block_to_key, rel_dir_to_key, rel_size_to_key, relmap_file_key, DBDIR_KEY}, - reltag::RelTag, - shard::ShardIdentity, -}; -use postgres_ffi::{pg_constants, relfile_utils::parse_relfilename, BLCKSZ}; -use tokio::task::JoinSet; -use tracing::{debug, info_span, instrument, Instrument}; - -use crate::{ - assert_u64_eq_usize::UsizeIsU64, - pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory}, -}; -use crate::{ - context::{DownloadBehavior, RequestContext}, - pgdatadir_mapping::{DbDirectory, RelDirectory}, - task_mgr::TaskKind, - tenant::storage_layer::{ImageLayerWriter, Layer}, -}; - -use pageserver_api::key::Key; use pageserver_api::key::{ - slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, CHECKPOINT_KEY, CONTROLFILE_KEY, - TWOPHASEDIR_KEY, + CHECKPOINT_KEY, CONTROLFILE_KEY, DBDIR_KEY, Key, TWOPHASEDIR_KEY, rel_block_to_key, + rel_dir_to_key, rel_size_to_key, relmap_file_key, slru_block_to_key, slru_dir_to_key, + slru_segment_size_to_key, }; -use pageserver_api::keyspace::singleton_range; -use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range}; -use pageserver_api::reltag::SlruKind; +use pageserver_api::keyspace::{contiguous_range_len, is_contiguous_range, singleton_range}; +use pageserver_api::reltag::{RelTag, SlruKind}; +use pageserver_api::shard::ShardIdentity; +use postgres_ffi::relfile_utils::parse_relfilename; +use postgres_ffi::{BLCKSZ, pg_constants}; +use remote_storage::RemotePath; +use tokio::task::JoinSet; +use tracing::{Instrument, debug, info_span, instrument}; use utils::bin_ser::BeSer; use utils::lsn::Lsn; -use std::collections::HashSet; -use std::ops::Range; - -use super::{ - importbucket_client::{ControlFile, RemoteStorageWrapper}, - Timeline, +use super::Timeline; +use super::importbucket_client::{ControlFile, RemoteStorageWrapper}; +use crate::assert_u64_eq_usize::UsizeIsU64; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::pgdatadir_mapping::{ + DbDirectory, RelDirectory, SlruSegmentDirectory, TwoPhaseDirectory, }; - -use remote_storage::RemotePath; +use crate::task_mgr::TaskKind; +use crate::tenant::storage_layer::{ImageLayerWriter, Layer}; pub async fn run( timeline: Arc, diff --git a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs index 68937e535d..a17a10d56b 100644 --- a/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs +++ b/pageserver/src/tenant/timeline/import_pgdata/importbucket_client.rs @@ -1,4 +1,5 @@ -use std::{ops::Bound, sync::Arc}; +use std::ops::Bound; +use std::sync::Arc; use anyhow::Context; use bytes::Bytes; @@ -12,9 +13,9 @@ use tokio_util::sync::CancellationToken; use tracing::{debug, info, instrument}; use utils::lsn::Lsn; -use crate::{assert_u64_eq_usize::U64IsUsize, config::PageServerConf}; - use super::{importbucket_format, index_part_format}; +use crate::assert_u64_eq_usize::U64IsUsize; +use crate::config::PageServerConf; pub async fn new( conf: &'static PageServerConf, diff --git a/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs b/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs index 310d97a6a9..ea7a41b25f 100644 --- a/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs +++ b/pageserver/src/tenant/timeline/import_pgdata/index_part_format.rs @@ -1,7 +1,6 @@ -use serde::{Deserialize, Serialize}; - #[cfg(feature = "testing")] use camino::Utf8PathBuf; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] pub enum Root { diff --git a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs index c5210f9a30..7c7a4de2fc 100644 --- a/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs +++ b/pageserver/src/tenant/timeline/import_pgdata/upcall_api.rs @@ -1,13 +1,12 @@ //! FIXME: most of this is copy-paste from mgmt_api.rs ; dedupe into a `reqwest_utils::Client` crate. use pageserver_client::mgmt_api::{Error, ResponseErrorMessageExt}; +use reqwest::Method; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; use tracing::error; -use crate::config::PageServerConf; -use reqwest::Method; - use super::importbucket_format::Spec; +use crate::config::PageServerConf; pub struct Client { base_url: String, diff --git a/pageserver/src/tenant/timeline/init.rs b/pageserver/src/tenant/timeline/init.rs index 6634d07a0d..e952df0845 100644 --- a/pageserver/src/tenant/timeline/init.rs +++ b/pageserver/src/tenant/timeline/init.rs @@ -1,22 +1,16 @@ -use crate::{ - is_temporary, - tenant::{ - ephemeral_file::is_ephemeral_file, - remote_timeline_client::{ - self, - index::{IndexPart, LayerFileMetadata}, - }, - storage_layer::LayerName, - }, -}; +use std::collections::{HashMap, hash_map}; +use std::str::FromStr; + use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; -use std::{ - collections::{hash_map, HashMap}, - str::FromStr, -}; use utils::lsn::Lsn; +use crate::is_temporary; +use crate::tenant::ephemeral_file::is_ephemeral_file; +use crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata}; +use crate::tenant::remote_timeline_client::{self}; +use crate::tenant::storage_layer::LayerName; + /// Identified files in the timeline directory. pub(super) enum Discovered { /// The only one we care about diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index 60e36a5d4d..e552ea83de 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -1,27 +1,22 @@ -use anyhow::{bail, ensure, Context}; +use std::collections::HashMap; +use std::sync::Arc; + +use anyhow::{Context, bail, ensure}; use itertools::Itertools; use pageserver_api::shard::TenantShardId; -use std::{collections::HashMap, sync::Arc}; use tracing::trace; -use utils::{ - id::TimelineId, - lsn::{AtomicLsn, Lsn}, -}; - -use crate::{ - config::PageServerConf, - context::RequestContext, - metrics::TimelineMetrics, - tenant::{ - layer_map::{BatchedUpdates, LayerMap}, - storage_layer::{ - AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc, - PersistentLayerKey, ResidentLayer, - }, - }, -}; +use utils::id::TimelineId; +use utils::lsn::{AtomicLsn, Lsn}; use super::TimelineWriterState; +use crate::config::PageServerConf; +use crate::context::RequestContext; +use crate::metrics::TimelineMetrics; +use crate::tenant::layer_map::{BatchedUpdates, LayerMap}; +use crate::tenant::storage_layer::{ + AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc, + PersistentLayerKey, ResidentLayer, +}; /// Provides semantic APIs to manipulate the layer map. pub(crate) enum LayerManager { @@ -214,9 +209,7 @@ impl OpenLayerManager { trace!( "creating in-memory layer at {}/{} for record at {}", - timeline_id, - start_lsn, - lsn + timeline_id, start_lsn, lsn ); let new_layer = diff --git a/pageserver/src/tenant/timeline/logical_size.rs b/pageserver/src/tenant/timeline/logical_size.rs index f4a4eea54a..397037ca9f 100644 --- a/pageserver/src/tenant/timeline/logical_size.rs +++ b/pageserver/src/tenant/timeline/logical_size.rs @@ -1,11 +1,10 @@ -use anyhow::Context; +use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering}; +use anyhow::Context; use once_cell::sync::OnceCell; use tokio_util::sync::CancellationToken; use utils::lsn::Lsn; -use std::sync::atomic::{AtomicBool, AtomicI64, Ordering as AtomicOrdering}; - /// Internal structure to hold all data needed for logical size calculation. /// /// Calculation consists of two stages: diff --git a/pageserver/src/tenant/timeline/offload.rs b/pageserver/src/tenant/timeline/offload.rs index 424a75005d..43ffaa6aab 100644 --- a/pageserver/src/tenant/timeline/offload.rs +++ b/pageserver/src/tenant/timeline/offload.rs @@ -2,11 +2,11 @@ use std::sync::Arc; use pageserver_api::models::{TenantState, TimelineState}; -use super::delete::{delete_local_timeline_directory, DeletionGuard}; use super::Timeline; +use super::delete::{DeletionGuard, delete_local_timeline_directory}; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; use crate::tenant::remote_timeline_client::ShutdownIfArchivedError; -use crate::tenant::timeline::delete::{make_timeline_delete_guard, TimelineDeleteGuardKind}; +use crate::tenant::timeline::delete::{TimelineDeleteGuardKind, make_timeline_delete_guard}; use crate::tenant::{ DeleteTimelineError, OffloadedTimeline, Tenant, TenantManifestError, TimelineOrOffloaded, }; diff --git a/pageserver/src/tenant/timeline/uninit.rs b/pageserver/src/tenant/timeline/uninit.rs index 3074463384..f66c0ffa0f 100644 --- a/pageserver/src/tenant/timeline/uninit.rs +++ b/pageserver/src/tenant/timeline/uninit.rs @@ -1,18 +1,21 @@ -use std::{collections::hash_map::Entry, fs, future::Future, sync::Arc}; +use std::collections::hash_map::Entry; +use std::fs; +use std::future::Future; +use std::sync::Arc; use anyhow::Context; use camino::Utf8PathBuf; use tracing::{error, info, info_span}; -use utils::{fs_ext, id::TimelineId, lsn::Lsn, sync::gate::GateGuard}; - -use crate::{ - context::RequestContext, - import_datadir, - span::debug_assert_current_span_has_tenant_and_timeline_id, - tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded}, -}; +use utils::fs_ext; +use utils::id::TimelineId; +use utils::lsn::Lsn; +use utils::sync::gate::GateGuard; use super::Timeline; +use crate::context::RequestContext; +use crate::import_datadir; +use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::tenant::{CreateTimelineError, CreateTimelineIdempotency, Tenant, TimelineOrOffloaded}; /// A timeline with some of its files on disk, being initialized. /// This struct ensures the atomicity of the timeline init: it's either properly created and inserted into pageserver's memory, or @@ -128,7 +131,7 @@ impl<'t> UninitializedTimeline<'t> { // We do not call Self::abort here. Because we don't cleanly shut down our Timeline, [`Self::drop`] should // skip trying to delete the timeline directory too. anyhow::bail!( - "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map" + "Found freshly initialized timeline {tenant_shard_id}/{timeline_id} in the tenant map" ) } Entry::Vacant(v) => { diff --git a/pageserver/src/tenant/timeline/walreceiver.rs b/pageserver/src/tenant/timeline/walreceiver.rs index 67429bff98..4f80073cc3 100644 --- a/pageserver/src/tenant/timeline/walreceiver.rs +++ b/pageserver/src/tenant/timeline/walreceiver.rs @@ -23,17 +23,11 @@ mod connection_manager; mod walreceiver_connection; -use crate::context::{DownloadBehavior, RequestContext}; -use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME}; -use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id; -use crate::tenant::timeline::walreceiver::connection_manager::{ - connection_manager_loop_step, ConnectionManagerState, -}; - use std::future::Future; use std::num::NonZeroU64; use std::sync::Arc; use std::time::Duration; + use storage_broker::BrokerClientChannel; use tokio::sync::watch; use tokio_util::sync::CancellationToken; @@ -41,8 +35,13 @@ use tracing::*; use utils::postgres_client::PostgresClientProtocol; use self::connection_manager::ConnectionManagerStatus; - use super::Timeline; +use crate::context::{DownloadBehavior, RequestContext}; +use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME}; +use crate::tenant::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::tenant::timeline::walreceiver::connection_manager::{ + ConnectionManagerState, connection_manager_loop_step, +}; #[derive(Clone)] pub struct WalReceiverConf { diff --git a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs index 1955345315..df2663f6bb 100644 --- a/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs +++ b/pageserver/src/tenant/timeline/walreceiver/connection_manager.rs @@ -9,45 +9,42 @@ //! then a (re)connection happens, if necessary. //! Only WAL streaming task expects to be finished, other loops (storage broker, connection management) never exit unless cancelled explicitly via the dedicated channel. -use std::{collections::HashMap, num::NonZeroU64, ops::ControlFlow, sync::Arc, time::Duration}; +use std::collections::HashMap; +use std::num::NonZeroU64; +use std::ops::ControlFlow; +use std::sync::Arc; +use std::time::Duration; -use super::{TaskStateUpdate, WalReceiverConf}; +use anyhow::Context; +use chrono::{NaiveDateTime, Utc}; +use pageserver_api::models::TimelineState; +use postgres_connection::PgConnectionConfig; +use storage_broker::proto::{ + FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse, + SubscribeByFilterRequest, TenantTimelineId as ProtoTenantTimelineId, TypeSubscription, + TypedMessage, +}; +use storage_broker::{BrokerClientChannel, Code, Streaming}; +use tokio_util::sync::CancellationToken; +use tracing::*; +use utils::backoff::{ + DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, exponential_backoff, +}; +use utils::id::{NodeId, TenantTimelineId}; +use utils::lsn::Lsn; +use utils::postgres_client::{ + ConnectionConfigArgs, PostgresClientProtocol, wal_stream_connection_config, +}; + +use super::walreceiver_connection::{WalConnectionStatus, WalReceiverError}; +use super::{TaskEvent, TaskHandle, TaskStateUpdate, WalReceiverConf}; use crate::context::{DownloadBehavior, RequestContext}; use crate::metrics::{ WALRECEIVER_ACTIVE_MANAGERS, WALRECEIVER_BROKER_UPDATES, WALRECEIVER_CANDIDATES_ADDED, WALRECEIVER_CANDIDATES_REMOVED, WALRECEIVER_SWITCHES, }; use crate::task_mgr::TaskKind; -use crate::tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline}; -use anyhow::Context; -use chrono::{NaiveDateTime, Utc}; -use pageserver_api::models::TimelineState; - -use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; -use storage_broker::proto::{ - FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse, - SubscribeByFilterRequest, TypeSubscription, TypedMessage, -}; -use storage_broker::{BrokerClientChannel, Code, Streaming}; -use tokio_util::sync::CancellationToken; -use tracing::*; - -use postgres_connection::PgConnectionConfig; -use utils::backoff::{ - exponential_backoff, DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS, -}; -use utils::postgres_client::{ - wal_stream_connection_config, ConnectionConfigArgs, PostgresClientProtocol, -}; -use utils::{ - id::{NodeId, TenantTimelineId}, - lsn::Lsn, -}; - -use super::{ - walreceiver_connection::WalConnectionStatus, walreceiver_connection::WalReceiverError, - TaskEvent, TaskHandle, -}; +use crate::tenant::{Timeline, debug_assert_current_span_has_tenant_and_timeline_id}; pub(crate) struct Cancelled; @@ -349,7 +346,9 @@ async fn subscribe_for_timeline_updates( Err(e) => { // Safekeeper nodes can stop pushing timeline updates to the broker, when no new writes happen and // entire WAL is streamed. Keep this noticeable with logging, but do not warn/error. - info!("Attempt #{attempt}, failed to subscribe for timeline {id} updates in broker: {e:#}"); + info!( + "Attempt #{attempt}, failed to subscribe for timeline {id} updates in broker: {e:#}" + ); continue; } } @@ -512,11 +511,11 @@ impl ConnectionManagerState { fn spawn( &self, task: impl FnOnce( - tokio::sync::watch::Sender>, - CancellationToken, - ) -> Fut - + Send - + 'static, + tokio::sync::watch::Sender>, + CancellationToken, + ) -> Fut + + Send + + 'static, ) -> TaskHandle where Fut: std::future::Future> + Send, @@ -880,8 +879,7 @@ impl ConnectionManagerState { discovered_new_wal = if candidate_commit_lsn > current_commit_lsn { trace!( "New candidate has commit_lsn {}, higher than current_commit_lsn {}", - candidate_commit_lsn, - current_commit_lsn + candidate_commit_lsn, current_commit_lsn ); Some(NewCommittedWAL { lsn: candidate_commit_lsn, @@ -1048,7 +1046,9 @@ impl ConnectionManagerState { if !node_ids_to_remove.is_empty() { for node_id in node_ids_to_remove { - info!("Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections"); + info!( + "Safekeeper node {node_id} did not send events for over {lagging_wal_timeout:?}, not retrying the connections" + ); self.wal_connection_retries.remove(&node_id); WALRECEIVER_CANDIDATES_REMOVED.inc(); } @@ -1119,11 +1119,12 @@ impl ReconnectReason { #[cfg(test)] mod tests { - use super::*; - use crate::tenant::harness::{TenantHarness, TIMELINE_ID}; use pageserver_api::config::defaults::DEFAULT_WAL_RECEIVER_PROTOCOL; use url::Host; + use super::*; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + fn dummy_broker_sk_timeline( commit_lsn: u64, safekeeper_connstr: &str, diff --git a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs index bb34a181da..f41a9cfe82 100644 --- a/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs +++ b/pageserver/src/tenant/timeline/walreceiver/walreceiver_connection.rs @@ -1,46 +1,48 @@ //! Actual Postgres connection handler to stream WAL to the server. -use std::{ - error::Error, - pin::pin, - str::FromStr, - sync::Arc, - time::{Duration, SystemTime}, -}; +use std::error::Error; +use std::pin::pin; +use std::str::FromStr; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bytes::BytesMut; use chrono::{NaiveDateTime, Utc}; use fail::fail_point; use futures::StreamExt; -use postgres_ffi::WAL_SEGMENT_SIZE; -use postgres_ffi::{v14::xlog_utils::normalize_lsn, waldecoder::WalDecodeError}; -use postgres_protocol::message::backend::ReplicationMessage; -use postgres_types::PgLsn; -use tokio::{select, sync::watch, time}; -use tokio_postgres::{error::SqlState, SimpleQueryMessage, SimpleQueryRow}; -use tokio_postgres::{replication::ReplicationStream, Client}; -use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, trace, warn, Instrument}; -use wal_decoder::{ - models::{FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords}, - wire_format::FromWireFormat, -}; - -use super::TaskStateUpdate; -use crate::{ - context::RequestContext, - metrics::{LIVE_CONNECTIONS, WALRECEIVER_STARTED_CONNECTIONS, WAL_INGEST}, - pgdatadir_mapping::DatadirModification, - task_mgr::{TaskKind, WALRECEIVER_RUNTIME}, - tenant::{debug_assert_current_span_has_tenant_and_timeline_id, Timeline, WalReceiverInfo}, - walingest::WalIngest, -}; use postgres_backend::is_expected_io_error; use postgres_connection::PgConnectionConfig; -use postgres_ffi::waldecoder::WalStreamDecoder; -use utils::{critical, id::NodeId, lsn::Lsn, postgres_client::PostgresClientProtocol}; -use utils::{pageserver_feedback::PageserverFeedback, sync::gate::GateError}; +use postgres_ffi::WAL_SEGMENT_SIZE; +use postgres_ffi::v14::xlog_utils::normalize_lsn; +use postgres_ffi::waldecoder::{WalDecodeError, WalStreamDecoder}; +use postgres_protocol::message::backend::ReplicationMessage; +use postgres_types::PgLsn; +use tokio::sync::watch; +use tokio::{select, time}; +use tokio_postgres::error::SqlState; +use tokio_postgres::replication::ReplicationStream; +use tokio_postgres::{Client, SimpleQueryMessage, SimpleQueryRow}; +use tokio_util::sync::CancellationToken; +use tracing::{Instrument, debug, error, info, trace, warn}; +use utils::critical; +use utils::id::NodeId; +use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; +use utils::postgres_client::PostgresClientProtocol; +use utils::sync::gate::GateError; +use wal_decoder::models::{FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords}; +use wal_decoder::wire_format::FromWireFormat; + +use super::TaskStateUpdate; +use crate::context::RequestContext; +use crate::metrics::{LIVE_CONNECTIONS, WAL_INGEST, WALRECEIVER_STARTED_CONNECTIONS}; +use crate::pgdatadir_mapping::DatadirModification; +use crate::task_mgr::{TaskKind, WALRECEIVER_RUNTIME}; +use crate::tenant::{ + Timeline, WalReceiverInfo, debug_assert_current_span_has_tenant_and_timeline_id, +}; +use crate::walingest::WalIngest; /// Status of the connection. #[derive(Debug, Clone, Copy)] @@ -149,7 +151,9 @@ pub(super) async fn handle_walreceiver_connection( // Timing out to connect to a safekeeper node could happen long time, due to // many reasons that pageserver cannot control. // Do not produce an error, but make it visible, that timeouts happen by logging the `event. - info!("Timed out while waiting {connect_timeout:?} for walreceiver connection to open"); + info!( + "Timed out while waiting {connect_timeout:?} for walreceiver connection to open" + ); return Ok(()); } } @@ -166,7 +170,9 @@ pub(super) async fn handle_walreceiver_connection( node: safekeeper_node, }; if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) { - warn!("Wal connection event listener dropped right after connection init, aborting the connection: {e}"); + warn!( + "Wal connection event listener dropped right after connection init, aborting the connection: {e}" + ); return Ok(()); } @@ -227,7 +233,9 @@ pub(super) async fn handle_walreceiver_connection( connection_status.latest_wal_update = Utc::now().naive_utc(); connection_status.commit_lsn = Some(end_of_wal); if let Err(e) = events_sender.send(TaskStateUpdate::Progress(connection_status)) { - warn!("Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}"); + warn!( + "Wal connection event listener dropped after IDENTIFY_SYSTEM, aborting the connection: {e}" + ); return Ok(()); } @@ -254,7 +262,9 @@ pub(super) async fn handle_walreceiver_connection( // to the safekeepers. startpoint = normalize_lsn(startpoint, WAL_SEGMENT_SIZE); - info!("last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}..."); + info!( + "last_record_lsn {last_rec_lsn} starting replication from {startpoint}, safekeeper is at {end_of_wal}..." + ); let query = format!("START_REPLICATION PHYSICAL {startpoint}"); @@ -626,7 +636,9 @@ pub(super) async fn handle_walreceiver_connection( let timestamp = keepalive.timestamp(); let reply_requested = keepalive.reply() != 0; - trace!("received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})"); + trace!( + "received PrimaryKeepAlive(wal_end: {wal_end}, timestamp: {timestamp:?} reply: {reply_requested})" + ); if reply_requested { Some(last_rec_lsn) diff --git a/pageserver/src/tenant/upload_queue.rs b/pageserver/src/tenant/upload_queue.rs index d302205ffe..d5dc9666ce 100644 --- a/pageserver/src/tenant/upload_queue.rs +++ b/pageserver/src/tenant/upload_queue.rs @@ -1,21 +1,18 @@ use std::collections::{HashMap, HashSet, VecDeque}; use std::fmt::Debug; -use std::sync::atomic::AtomicU32; use std::sync::Arc; - -use super::remote_timeline_client::is_same_remote_layer_path; -use super::storage_layer::AsLayerDesc as _; -use super::storage_layer::LayerName; -use super::storage_layer::ResidentLayer; -use crate::tenant::metadata::TimelineMetadata; -use crate::tenant::remote_timeline_client::index::IndexPart; -use crate::tenant::remote_timeline_client::index::LayerFileMetadata; -use utils::generation::Generation; -use utils::lsn::{AtomicLsn, Lsn}; +use std::sync::atomic::AtomicU32; use chrono::NaiveDateTime; use once_cell::sync::Lazy; use tracing::info; +use utils::generation::Generation; +use utils::lsn::{AtomicLsn, Lsn}; + +use super::remote_timeline_client::is_same_remote_layer_path; +use super::storage_layer::{AsLayerDesc as _, LayerName, ResidentLayer}; +use crate::tenant::metadata::TimelineMetadata; +use crate::tenant::remote_timeline_client::index::{IndexPart, LayerFileMetadata}; /// Kill switch for upload queue reordering in case it causes problems. /// TODO: remove this once we have confidence in it. @@ -225,7 +222,7 @@ impl UploadQueueInitialized { // most one of them can be an index upload (enforced by can_bypass). .scan(&self.clean.0, |next_active_index, op| { let active_index = *next_active_index; - if let UploadOp::UploadMetadata { ref uploaded } = op { + if let UploadOp::UploadMetadata { uploaded } = op { *next_active_index = uploaded; // stash index for next operation after this } Some((op, active_index)) @@ -562,16 +559,18 @@ impl UploadOp { #[cfg(test)] mod tests { - use super::*; - use crate::tenant::harness::{TenantHarness, TIMELINE_ID}; - use crate::tenant::storage_layer::layer::local_layer_path; - use crate::tenant::storage_layer::Layer; - use crate::tenant::Timeline; - use crate::DEFAULT_PG_VERSION; - use itertools::Itertools as _; use std::str::FromStr as _; + + use itertools::Itertools as _; use utils::shard::{ShardCount, ShardIndex, ShardNumber}; + use super::*; + use crate::DEFAULT_PG_VERSION; + use crate::tenant::Timeline; + use crate::tenant::harness::{TIMELINE_ID, TenantHarness}; + use crate::tenant::storage_layer::Layer; + use crate::tenant::storage_layer::layer::local_layer_path; + /// Test helper which asserts that two operations are the same, in lieu of UploadOp PartialEq. #[track_caller] fn assert_same_op(a: &UploadOp, b: &UploadOp) { @@ -690,10 +689,22 @@ mod tests { let tli = make_timeline(); let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer3 = make_layer( + &tli, + "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let (barrier, _) = tokio::sync::watch::channel(()); // Enqueue non-conflicting upload, delete, and index before and after a barrier. @@ -757,10 +768,22 @@ mod tests { let tli = make_timeline(); // Enqueue a bunch of deletes, some with conflicting names. - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer3 = make_layer( + &tli, + "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let ops = [ UploadOp::Delete(Delete { @@ -802,9 +825,21 @@ mod tests { let tli = make_timeline(); // Enqueue three versions of the same layer, with different file sizes. - let layer0a = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 1); - let layer0b = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 2); - let layer0c = make_layer_with_size(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", 3); + let layer0a = make_layer_with_size( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + 1, + ); + let layer0b = make_layer_with_size( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + 2, + ); + let layer0c = make_layer_with_size( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + 3, + ); let ops = [ UploadOp::UploadLayer(layer0a.clone(), layer0a.metadata(), None), @@ -836,8 +871,14 @@ mod tests { // Enqueue two layer uploads, with a delete of both layers in between them. These should be // scheduled one at a time, since deletes can't bypass uploads and vice versa. - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let ops = [ UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None), @@ -878,10 +919,22 @@ mod tests { // // Also enqueue non-conflicting uploads and deletes at the end. These can bypass the queue // and run immediately. - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer3 = make_layer( + &tli, + "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let ops = [ UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None), @@ -916,9 +969,18 @@ mod tests { let tli = make_timeline(); // Enqueue three different layer uploads. - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let ops = [ UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None), @@ -981,11 +1043,20 @@ mod tests { // Enqueue three uploads of the current empty index. let index = Box::new(queue.clean.0.clone()); - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let index0 = index_with(&index, &layer0); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let index1 = index_with(&index0, &layer1); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let index2 = index_with(&index1, &layer2); let ops = [ @@ -1045,7 +1116,10 @@ mod tests { let tli = make_timeline(); // Create a layer to upload. - let layer = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let index_upload = index_with(&queue.clean.0, &layer); // Remove the layer reference in a new index, then delete the layer. @@ -1090,7 +1164,10 @@ mod tests { let tli = make_timeline(); // Create a layer to upload. - let layer = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); // Upload the layer. Then dereference the layer, and upload/reference it again. let index_upload = index_with(&queue.clean.0, &layer); @@ -1138,10 +1215,22 @@ mod tests { let tli = make_timeline(); let index = Box::new(queue.clean.0.clone()); // empty, doesn't matter - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer3 = make_layer( + &tli, + "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); // Enqueue non-conflicting upload, delete, and index before and after a shutdown. let ops = [ @@ -1197,10 +1286,22 @@ mod tests { let tli = make_timeline(); // Enqueue a bunch of uploads. - let layer0 = make_layer(&tli, "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer1 = make_layer(&tli, "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer2 = make_layer(&tli, "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); - let layer3 = make_layer(&tli, "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51"); + let layer0 = make_layer( + &tli, + "000000000000000000000000000000000000-100000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer1 = make_layer( + &tli, + "100000000000000000000000000000000000-200000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer2 = make_layer( + &tli, + "200000000000000000000000000000000000-300000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); + let layer3 = make_layer( + &tli, + "300000000000000000000000000000000000-400000000000000000000000000000000000__00000000016B59D8-00000000016B5A51", + ); let ops = [ UploadOp::UploadLayer(layer0.clone(), layer0.metadata(), None), diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs index 47fb4a276b..dcf17a376c 100644 --- a/pageserver/src/tenant/vectored_blob_io.rs +++ b/pageserver/src/tenant/vectored_blob_io.rs @@ -27,8 +27,7 @@ use utils::vec_map::VecMap; use crate::context::RequestContext; use crate::tenant::blob_io::{BYTE_UNCOMPRESSED, BYTE_ZSTD, LEN_COMPRESSION_BIT_MASK}; -use crate::virtual_file::IoBufferMut; -use crate::virtual_file::{self, VirtualFile}; +use crate::virtual_file::{self, IoBufferMut, VirtualFile}; /// Metadata bundled with the start and end offset of a blob. #[derive(Copy, Clone, Debug)] @@ -139,7 +138,10 @@ impl VectoredBlob { bits => { let error = std::io::Error::new( std::io::ErrorKind::InvalidData, - format!("Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", self.meta.key, self.meta.lsn, self.start, self.end), + format!( + "Failed to decompress blob for {}@{}, {}..{}: invalid compression byte {bits:x}", + self.meta.key, self.meta.lsn, self.start, self.end + ), ); Err(error) } @@ -677,13 +679,12 @@ impl StreamingVectoredReadPlanner { mod tests { use anyhow::Error; + use super::super::blob_io::tests::{random_array, write_maybe_compressed}; + use super::*; use crate::context::DownloadBehavior; use crate::page_cache::PAGE_SZ; use crate::task_mgr::TaskKind; - use super::super::blob_io::tests::{random_array, write_maybe_compressed}; - use super::*; - fn validate_read(read: &VectoredRead, offset_range: &[(Key, Lsn, u64, BlobFlag)]) { const ALIGN: u64 = virtual_file::get_io_buffer_alignment() as u64; assert_eq!(read.start % ALIGN, 0); diff --git a/pageserver/src/utilization.rs b/pageserver/src/utilization.rs index 093a944777..29d1a31aaf 100644 --- a/pageserver/src/utilization.rs +++ b/pageserver/src/utilization.rs @@ -3,13 +3,15 @@ //! The metric is exposed via `GET /v1/utilization`. Refer and maintain it's openapi spec as the //! truth. -use anyhow::Context; use std::path::Path; + +use anyhow::Context; +use pageserver_api::models::PageserverUtilization; use utils::serde_percent::Percent; -use pageserver_api::models::PageserverUtilization; - -use crate::{config::PageServerConf, metrics::NODE_UTILIZATION_SCORE, tenant::mgr::TenantManager}; +use crate::config::PageServerConf; +use crate::metrics::NODE_UTILIZATION_SCORE; +use crate::tenant::mgr::TenantManager; pub(crate) fn regenerate( conf: &PageServerConf, diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index c966ad813f..b47aecf8a6 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -11,11 +11,13 @@ //! This is similar to PostgreSQL's virtual file descriptor facility in //! src/backend/storage/file/fd.c //! -use crate::context::RequestContext; -use crate::metrics::{StorageIoOperation, STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC}; +use std::fs::File; +use std::io::{Error, ErrorKind, Seek, SeekFrom}; +use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; +#[cfg(target_os = "linux")] +use std::os::unix::fs::OpenOptionsExt; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering}; -use crate::page_cache::{PageWriteGuard, PAGE_SZ}; -use crate::tenant::TENANTS_SEGMENT_NAME; use camino::{Utf8Path, Utf8PathBuf}; use once_cell::sync::OnceCell; use owned_buffers_io::aligned_buffer::buffer::AlignedBuffer; @@ -23,31 +25,30 @@ use owned_buffers_io::aligned_buffer::{AlignedBufferMut, AlignedSlice, ConstAlig use owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut}; use owned_buffers_io::io_buf_ext::FullSlice; use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT; +pub use pageserver_api::models::virtual_file as api; use pageserver_api::shard::TenantShardId; -use std::fs::File; -use std::io::{Error, ErrorKind, Seek, SeekFrom}; -#[cfg(target_os = "linux")] -use std::os::unix::fs::OpenOptionsExt; -use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice}; - -use std::os::fd::{AsRawFd, FromRawFd, IntoRawFd, OwnedFd, RawFd}; -use std::sync::atomic::{AtomicBool, AtomicU8, AtomicUsize, Ordering}; use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use tokio::time::Instant; +use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice}; -pub use pageserver_api::models::virtual_file as api; +use crate::context::RequestContext; +use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC, StorageIoOperation}; +use crate::page_cache::{PAGE_SZ, PageWriteGuard}; +use crate::tenant::TENANTS_SEGMENT_NAME; pub(crate) mod io_engine; -pub use io_engine::feature_test as io_engine_feature_test; -pub use io_engine::io_engine_for_bench; -pub use io_engine::FeatureTestResult as IoEngineFeatureTestResult; +pub use io_engine::{ + FeatureTestResult as IoEngineFeatureTestResult, feature_test as io_engine_feature_test, + io_engine_for_bench, +}; mod metadata; mod open_options; -use self::owned_buffers_io::write::OwnedAsyncWriter; pub(crate) use api::IoMode; pub(crate) use io_engine::IoEngineKind; pub(crate) use metadata::Metadata; pub(crate) use open_options::*; +use self::owned_buffers_io::write::OwnedAsyncWriter; + pub(crate) mod owned_buffers_io { //! Abstractions for IO with owned buffers. //! @@ -1078,7 +1079,8 @@ where #[cfg(test)] mod test_read_exact_at_impl { - use std::{collections::VecDeque, sync::Arc}; + use std::collections::VecDeque; + use std::sync::Arc; use tokio_epoll_uring::{BoundedBuf, BoundedBufMut}; @@ -1424,19 +1426,19 @@ static SYNC_MODE: AtomicU8 = AtomicU8::new(SyncMode::Sync as u8); #[cfg(test)] mod tests { - use crate::context::DownloadBehavior; - use crate::task_mgr::TaskKind; - - use super::*; - use owned_buffers_io::io_buf_ext::IoBufExt; - use owned_buffers_io::slice::SliceMutExt; - use rand::seq::SliceRandom; - use rand::thread_rng; - use rand::Rng; use std::io::Write; use std::os::unix::fs::FileExt; use std::sync::Arc; + use owned_buffers_io::io_buf_ext::IoBufExt; + use owned_buffers_io::slice::SliceMutExt; + use rand::seq::SliceRandom; + use rand::{Rng, thread_rng}; + + use super::*; + use crate::context::DownloadBehavior; + use crate::task_mgr::TaskKind; + enum MaybeVirtualFile { VirtualFile(VirtualFile), File(File), diff --git a/pageserver/src/virtual_file/io_engine.rs b/pageserver/src/virtual_file/io_engine.rs index ccde90ee1a..758dd6e377 100644 --- a/pageserver/src/virtual_file/io_engine.rs +++ b/pageserver/src/virtual_file/io_engine.rs @@ -80,7 +80,9 @@ pub(crate) fn get() -> IoEngine { Ok(v) => match v.parse::() { Ok(engine_kind) => engine_kind, Err(e) => { - panic!("invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}") + panic!( + "invalid VirtualFile io engine for env var {env_var_name}: {e:#}: {v:?}" + ) } }, Err(std::env::VarError::NotPresent) => { @@ -107,15 +109,12 @@ pub(crate) fn get() -> IoEngine { } } -use std::{ - os::unix::prelude::FileExt, - sync::atomic::{AtomicU8, Ordering}, -}; +use std::os::unix::prelude::FileExt; +use std::sync::atomic::{AtomicU8, Ordering}; -use super::{ - owned_buffers_io::{io_buf_ext::FullSlice, slice::SliceMutExt}, - FileGuard, Metadata, -}; +use super::owned_buffers_io::io_buf_ext::FullSlice; +use super::owned_buffers_io::slice::SliceMutExt; +use super::{FileGuard, Metadata}; #[cfg(target_os = "linux")] fn epoll_uring_error_to_std(e: tokio_epoll_uring::Error) -> std::io::Error { diff --git a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs index c67215492f..ad17405b64 100644 --- a/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs +++ b/pageserver/src/virtual_file/io_engine/tokio_epoll_uring_ext.rs @@ -5,18 +5,16 @@ //! on older kernels, such as some (but not all) older kernels in the Linux 5.10 series. //! See for more details. -use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use std::sync::Arc; - -use tokio_util::sync::CancellationToken; -use tracing::{error, info, info_span, warn, Instrument}; -use utils::backoff::{DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS}; +use std::sync::atomic::{AtomicU32, AtomicU64, Ordering}; use tokio_epoll_uring::{System, SystemHandle}; - -use crate::virtual_file::on_fatal_io_error; +use tokio_util::sync::CancellationToken; +use tracing::{Instrument, error, info, info_span, warn}; +use utils::backoff::{DEFAULT_BASE_BACKOFF_SECONDS, DEFAULT_MAX_BACKOFF_SECONDS}; use crate::metrics::tokio_epoll_uring::{self as metrics, THREAD_LOCAL_METRICS_STORAGE}; +use crate::virtual_file::on_fatal_io_error; #[derive(Clone)] struct ThreadLocalState(Arc); @@ -194,7 +192,7 @@ impl std::ops::Deref for Handle { fn deref(&self) -> &Self::Target { self.0 - .0 + .0 .cell .get() .expect("must be already initialized when using this") diff --git a/pageserver/src/virtual_file/open_options.rs b/pageserver/src/virtual_file/open_options.rs index 7f951270d1..e188b8649b 100644 --- a/pageserver/src/virtual_file/open_options.rs +++ b/pageserver/src/virtual_file/open_options.rs @@ -1,7 +1,9 @@ //! Enum-dispatch to the `OpenOptions` type of the respective [`super::IoEngineKind`]; +use std::os::fd::OwnedFd; +use std::path::Path; + use super::io_engine::IoEngine; -use std::{os::fd::OwnedFd, path::Path}; #[derive(Debug, Clone)] pub enum OpenOptions { diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs index a5c26cd746..090d2ece85 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer.rs @@ -1,9 +1,9 @@ -use std::{ - ops::{Deref, Range, RangeBounds}, - sync::Arc, -}; +use std::ops::{Deref, Range, RangeBounds}; +use std::sync::Arc; -use super::{alignment::Alignment, raw::RawAlignedBuffer, AlignedBufferMut, ConstAlign}; +use super::alignment::Alignment; +use super::raw::RawAlignedBuffer; +use super::{AlignedBufferMut, ConstAlign}; /// An shared, immutable aligned buffer type. #[derive(Clone, Debug)] diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs index d2f5e206bb..df5c911e50 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/buffer_mut.rs @@ -1,13 +1,9 @@ -use std::{ - mem::MaybeUninit, - ops::{Deref, DerefMut}, -}; +use std::mem::MaybeUninit; +use std::ops::{Deref, DerefMut}; -use super::{ - alignment::{Alignment, ConstAlign}, - buffer::AlignedBuffer, - raw::RawAlignedBuffer, -}; +use super::alignment::{Alignment, ConstAlign}; +use super::buffer::AlignedBuffer; +use super::raw::RawAlignedBuffer; /// A mutable aligned buffer type. #[derive(Debug)] @@ -75,7 +71,8 @@ impl AlignedBufferMut { /// Force the length of the buffer to `new_len`. #[inline] unsafe fn set_len(&mut self, new_len: usize) { - self.raw.set_len(new_len) + // SAFETY: the caller is unsafe + unsafe { self.raw.set_len(new_len) } } #[inline] @@ -222,8 +219,10 @@ unsafe impl bytes::BufMut for AlignedBufferMut { panic_advance(cnt, remaining); } - // Addition will not overflow since the sum is at most the capacity. - self.set_len(len + cnt); + // SAFETY: Addition will not overflow since the sum is at most the capacity. + unsafe { + self.set_len(len + cnt); + } } #[inline] @@ -275,7 +274,10 @@ unsafe impl tokio_epoll_uring::IoBufMut for AlignedBufferMut { unsafe fn set_init(&mut self, init_len: usize) { if self.len() < init_len { - self.set_len(init_len); + // SAFETY: caller function is unsafe + unsafe { + self.set_len(init_len); + } } } } diff --git a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs index 6c26dec0db..97a6c4049a 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/aligned_buffer/raw.rs @@ -1,9 +1,7 @@ use core::slice; -use std::{ - alloc::{self, Layout}, - cmp, - mem::ManuallyDrop, -}; +use std::alloc::{self, Layout}; +use std::cmp; +use std::mem::ManuallyDrop; use super::alignment::{Alignment, ConstAlign}; diff --git a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs index 525f447b6d..4c671c2652 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/io_buf_ext.rs @@ -1,11 +1,12 @@ //! See [`FullSlice`]. -use crate::virtual_file::{IoBuffer, IoBufferMut}; -use bytes::{Bytes, BytesMut}; use std::ops::{Deref, Range}; + +use bytes::{Bytes, BytesMut}; use tokio_epoll_uring::{BoundedBuf, IoBuf, Slice}; use super::write::CheapCloneForRead; +use crate::virtual_file::{IoBuffer, IoBufferMut}; /// The true owned equivalent for Rust [`slice`]. Use this for the write path. /// diff --git a/pageserver/src/virtual_file/owned_buffers_io/slice.rs b/pageserver/src/virtual_file/owned_buffers_io/slice.rs index 6100593663..9f4a05dd57 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/slice.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/slice.rs @@ -1,7 +1,4 @@ -use tokio_epoll_uring::BoundedBuf; -use tokio_epoll_uring::BoundedBufMut; -use tokio_epoll_uring::IoBufMut; -use tokio_epoll_uring::Slice; +use tokio_epoll_uring::{BoundedBuf, BoundedBufMut, IoBufMut, Slice}; pub(crate) trait SliceMutExt { /// Get a `&mut[0..self.bytes_total()`] slice, for when you need to do borrow-based IO. @@ -35,10 +32,11 @@ where mod tests { use std::io::Read; - use super::*; use bytes::Buf; use tokio_epoll_uring::Slice; + use super::*; + #[test] fn test_slice_full_zeroed() { let make_fake_file = || bytes::BytesMut::from(&b"12345"[..]).reader(); diff --git a/pageserver/src/virtual_file/owned_buffers_io/write.rs b/pageserver/src/virtual_file/owned_buffers_io/write.rs index 7299d83703..861ca3aa2a 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/write.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs @@ -1,20 +1,14 @@ mod flush; use std::sync::Arc; +pub(crate) use flush::FlushControl; use flush::FlushHandle; use tokio_epoll_uring::IoBuf; -use crate::{ - context::RequestContext, - virtual_file::{IoBuffer, IoBufferMut}, -}; - -use super::{ - io_buf_aligned::IoBufAligned, - io_buf_ext::{FullSlice, IoBufExt}, -}; - -pub(crate) use flush::FlushControl; +use super::io_buf_aligned::IoBufAligned; +use super::io_buf_ext::{FullSlice, IoBufExt}; +use crate::context::RequestContext; +use crate::virtual_file::{IoBuffer, IoBufferMut}; pub(crate) trait CheapCloneForRead { /// Returns a cheap clone of the buffer. diff --git a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs index 9ce8b311bb..46309d4011 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs @@ -2,12 +2,10 @@ use std::sync::Arc; use utils::sync::duplex; -use crate::{ - context::RequestContext, - virtual_file::owned_buffers_io::{io_buf_aligned::IoBufAligned, io_buf_ext::FullSlice}, -}; - use super::{Buffer, CheapCloneForRead, OwnedAsyncWriter}; +use crate::context::RequestContext; +use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAligned; +use crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice; /// A handle to the flush task. pub struct FlushHandle { diff --git a/pageserver/src/walingest.rs b/pageserver/src/walingest.rs index 45c87353a7..18df065f76 100644 --- a/pageserver/src/walingest.rs +++ b/pageserver/src/walingest.rs @@ -22,39 +22,35 @@ //! bespoken Rust code. use std::collections::HashMap; -use std::sync::Arc; -use std::sync::OnceLock; -use std::time::Duration; -use std::time::Instant; -use std::time::SystemTime; +use std::sync::{Arc, OnceLock}; +use std::time::{Duration, Instant, SystemTime}; -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; use bytes::{Buf, Bytes}; -use tracing::*; - -use crate::context::RequestContext; -use crate::metrics::WAL_INGEST; -use crate::pgdatadir_mapping::{DatadirModification, Version}; -use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; -use crate::tenant::PageReconstructError; -use crate::tenant::Timeline; -use crate::ZERO_PAGE; use pageserver_api::key::rel_block_to_key; use pageserver_api::record::NeonWalRecord; use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; use pageserver_api::shard::ShardIdentity; -use postgres_ffi::fsm_logical_to_physical; -use postgres_ffi::pg_constants; use postgres_ffi::relfile_utils::{FSM_FORKNUM, INIT_FORKNUM, MAIN_FORKNUM, VISIBILITYMAP_FORKNUM}; use postgres_ffi::walrecord::*; -use postgres_ffi::TransactionId; -use postgres_ffi::{dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, TimestampTz}; +use postgres_ffi::{ + TimestampTz, TransactionId, dispatch_pgversion, enum_pgversion, enum_pgversion_dispatch, + fsm_logical_to_physical, pg_constants, +}; +use tracing::*; use utils::bin_ser::SerializeError; use utils::lsn::Lsn; use utils::rate_limit::RateLimit; use utils::{critical, failpoint_support}; use wal_decoder::models::*; +use crate::ZERO_PAGE; +use crate::context::RequestContext; +use crate::metrics::WAL_INGEST; +use crate::pgdatadir_mapping::{DatadirModification, Version}; +use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; +use crate::tenant::{PageReconstructError, Timeline}; + enum_pgversion! {CheckPoint, pgv::CheckPoint} impl CheckPoint { @@ -302,7 +298,9 @@ impl WalIngest { if xid > next_xid { // Wraparound occurred, must be from a prev epoch. if epoch == 0 { - bail!("apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}"); + bail!( + "apparent XID wraparound with prepared transaction XID {xid}, nextXid is {next_full_xid}" + ); } epoch -= 1; } @@ -796,9 +794,7 @@ impl WalIngest { // Remove twophase file. see RemoveTwoPhaseFile() in postgres code trace!( "Drop twophaseFile for xid {} parsed_xact.xid {} here at {}", - xl_xid, - parsed.xid, - lsn, + xl_xid, parsed.xid, lsn, ); let xid: u64 = if modification.tline.pg_version >= 17 { @@ -1130,16 +1126,14 @@ impl WalIngest { let xlog_checkpoint = pgv::CheckPoint::decode(&checkpoint_bytes)?; trace!( "xlog_checkpoint.oldestXid={}, checkpoint.oldestXid={}", - xlog_checkpoint.oldestXid, - cp.oldestXid + xlog_checkpoint.oldestXid, cp.oldestXid ); if (cp.oldestXid.wrapping_sub(xlog_checkpoint.oldestXid) as i32) < 0 { cp.oldestXid = xlog_checkpoint.oldestXid; } trace!( "xlog_checkpoint.oldestActiveXid={}, checkpoint.oldestActiveXid={}", - xlog_checkpoint.oldestActiveXid, - cp.oldestActiveXid + xlog_checkpoint.oldestActiveXid, cp.oldestActiveXid ); // A shutdown checkpoint has `oldestActiveXid == InvalidTransactionid`, @@ -1368,8 +1362,9 @@ impl WalIngest { // with zero pages. Logging is rate limited per pg version to // avoid skewing. if gap_blocks_filled > 0 { - use once_cell::sync::Lazy; use std::sync::Mutex; + + use once_cell::sync::Lazy; use utils::rate_limit::RateLimit; struct RateLimitPerPgVersion { @@ -1475,10 +1470,7 @@ impl WalIngest { if new_nblocks > old_nblocks { trace!( "extending SLRU {:?} seg {} from {} to {} blocks", - kind, - segno, - old_nblocks, - new_nblocks + kind, segno, old_nblocks, new_nblocks ); modification.put_slru_extend(kind, segno, new_nblocks)?; @@ -1517,13 +1509,13 @@ async fn get_relsize( #[allow(clippy::bool_assert_comparison)] #[cfg(test)] mod tests { - use super::*; - use crate::tenant::harness::*; - use crate::tenant::remote_timeline_client::{remote_initdb_archive_path, INITDB_PATH}; - use crate::tenant::storage_layer::IoConcurrency; use postgres_ffi::RELSEG_SIZE; + use super::*; use crate::DEFAULT_PG_VERSION; + use crate::tenant::harness::*; + use crate::tenant::remote_timeline_client::{INITDB_PATH, remote_initdb_archive_path}; + use crate::tenant::storage_layer::IoConcurrency; /// Arbitrary relation tag, for testing. const TESTREL_A: RelTag = RelTag { @@ -1606,10 +1598,12 @@ mod tests { .await?, false ); - assert!(tline - .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx) - .await - .is_err()); + assert!( + tline + .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx) + .await + .is_err() + ); assert_eq!( tline .get_rel_exists(TESTREL_A, Version::Lsn(Lsn(0x20)), &ctx) @@ -1997,10 +1991,12 @@ mod tests { .await?, false ); - assert!(tline - .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx) - .await - .is_err()); + assert!( + tline + .get_rel_size(TESTREL_A, Version::Lsn(Lsn(0x10)), &ctx) + .await + .is_err() + ); assert_eq!( tline @@ -2230,9 +2226,10 @@ mod tests { /// without waiting for unrelated steps. #[tokio::test] async fn test_ingest_real_wal() { - use crate::tenant::harness::*; - use postgres_ffi::waldecoder::WalStreamDecoder; use postgres_ffi::WAL_SEGMENT_SIZE; + use postgres_ffi::waldecoder::WalStreamDecoder; + + use crate::tenant::harness::*; // Define test data path and constants. // diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 027a6eb7d7..22d8d83811 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -24,26 +24,27 @@ mod process; /// Code to apply [`NeonWalRecord`]s. pub(crate) mod apply_neon; -use crate::config::PageServerConf; -use crate::metrics::{ - WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, - WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME, -}; +use std::future::Future; +use std::sync::Arc; +use std::time::{Duration, Instant}; + use anyhow::Context; use bytes::{Bytes, BytesMut}; use pageserver_api::key::Key; use pageserver_api::models::{WalRedoManagerProcessStatus, WalRedoManagerStatus}; use pageserver_api::record::NeonWalRecord; use pageserver_api::shard::TenantShardId; -use std::future::Future; -use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; use tracing::*; use utils::lsn::Lsn; use utils::sync::gate::GateError; use utils::sync::heavier_once_cell; +use crate::config::PageServerConf; +use crate::metrics::{ + WAL_REDO_BYTES_HISTOGRAM, WAL_REDO_PROCESS_LAUNCH_DURATION_HISTOGRAM, + WAL_REDO_RECORDS_HISTOGRAM, WAL_REDO_TIME, +}; + /// The real implementation that uses a Postgres process to /// perform WAL replay. /// @@ -547,15 +548,18 @@ impl PostgresRedoManager { #[cfg(test)] mod tests { - use super::PostgresRedoManager; - use crate::config::PageServerConf; + use std::str::FromStr; + use bytes::Bytes; use pageserver_api::key::Key; use pageserver_api::record::NeonWalRecord; use pageserver_api::shard::TenantShardId; - use std::str::FromStr; use tracing::Instrument; - use utils::{id::TenantId, lsn::Lsn}; + use utils::id::TenantId; + use utils::lsn::Lsn; + + use super::PostgresRedoManager; + use crate::config::PageServerConf; #[tokio::test] async fn test_ping() { diff --git a/pageserver/src/walredo/apply_neon.rs b/pageserver/src/walredo/apply_neon.rs index d62e325310..61ae1eb970 100644 --- a/pageserver/src/walredo/apply_neon.rs +++ b/pageserver/src/walredo/apply_neon.rs @@ -4,13 +4,12 @@ use bytes::BytesMut; use pageserver_api::key::Key; use pageserver_api::record::NeonWalRecord; use pageserver_api::reltag::SlruKind; -use postgres_ffi::pg_constants; use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM; use postgres_ffi::v14::nonrelfile_utils::{ mx_offset_to_flags_bitshift, mx_offset_to_flags_offset, mx_offset_to_member_offset, transaction_id_set_status, }; -use postgres_ffi::BLCKSZ; +use postgres_ffi::{BLCKSZ, pg_constants}; use tracing::*; use utils::lsn::Lsn; diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs index bf30b92ea5..5a9fc63e63 100644 --- a/pageserver/src/walredo/process.rs +++ b/pageserver/src/walredo/process.rs @@ -2,28 +2,28 @@ mod no_leak_child; /// The IPC protocol that pageserver and walredo process speak over their shared pipe. mod protocol; -use self::no_leak_child::NoLeakChild; -use crate::{ - config::PageServerConf, - metrics::{WalRedoKillCause, WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER}, - page_cache::PAGE_SZ, - span::debug_assert_current_span_has_tenant_id, -}; +use std::collections::VecDeque; +use std::process::{Command, Stdio}; +#[cfg(feature = "testing")] +use std::sync::atomic::AtomicUsize; +use std::time::Duration; + use anyhow::Context; use bytes::Bytes; use pageserver_api::record::NeonWalRecord; -use pageserver_api::{reltag::RelTag, shard::TenantShardId}; +use pageserver_api::reltag::RelTag; +use pageserver_api::shard::TenantShardId; use postgres_ffi::BLCKSZ; -#[cfg(feature = "testing")] -use std::sync::atomic::AtomicUsize; -use std::{ - collections::VecDeque, - process::{Command, Stdio}, - time::Duration, -}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tracing::{debug, error, instrument, Instrument}; -use utils::{lsn::Lsn, poison::Poison}; +use tracing::{Instrument, debug, error, instrument}; +use utils::lsn::Lsn; +use utils::poison::Poison; + +use self::no_leak_child::NoLeakChild; +use crate::config::PageServerConf; +use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WAL_REDO_RECORD_COUNTER, WalRedoKillCause}; +use crate::page_cache::PAGE_SZ; +use crate::span::debug_assert_current_span_has_tenant_id; pub struct WalRedoProcess { #[allow(dead_code)] diff --git a/pageserver/src/walredo/process/no_leak_child.rs b/pageserver/src/walredo/process/no_leak_child.rs index 1a0d7039df..9939fc4b36 100644 --- a/pageserver/src/walredo/process/no_leak_child.rs +++ b/pageserver/src/walredo/process/no_leak_child.rs @@ -1,19 +1,11 @@ -use tracing::instrument; -use tracing::{error, info}; - -use crate::metrics::WalRedoKillCause; -use crate::metrics::WAL_REDO_PROCESS_COUNTERS; - use std::io; -use std::process::Command; - -use std::ops::DerefMut; - -use std::ops::Deref; - -use std::process::Child; +use std::ops::{Deref, DerefMut}; +use std::process::{Child, Command}; use pageserver_api::shard::TenantShardId; +use tracing::{error, info, instrument}; + +use crate::metrics::{WAL_REDO_PROCESS_COUNTERS, WalRedoKillCause}; /// Wrapper type around `std::process::Child` which guarantees that the child /// will be killed and waited-for by this process before being dropped. diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index c86ac576ad..bb937ad56a 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "safekeeper" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [features] diff --git a/safekeeper/benches/receive_wal.rs b/safekeeper/benches/receive_wal.rs index 1c0ae66f01..122630d953 100644 --- a/safekeeper/benches/receive_wal.rs +++ b/safekeeper/benches/receive_wal.rs @@ -4,7 +4,7 @@ use std::io::Write as _; use bytes::BytesMut; use camino_tempfile::tempfile; -use criterion::{criterion_group, criterion_main, BatchSize, Bencher, Criterion}; +use criterion::{BatchSize, Bencher, Criterion, criterion_group, criterion_main}; use itertools::Itertools as _; use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator}; use pprof::criterion::{Output, PProfProfiler}; @@ -27,7 +27,7 @@ const GB: usize = 1024 * MB; static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; #[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] +#[unsafe(export_name = "malloc_conf")] pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0"; // Register benchmarks with Criterion. diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 6cc53e0d23..10fc4a4b59 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -1,52 +1,41 @@ // // Main entry point for the safekeeper executable // -use anyhow::{bail, Context, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use clap::{ArgAction, Parser}; -use futures::future::BoxFuture; -use futures::stream::FuturesUnordered; -use futures::{FutureExt, StreamExt}; -use remote_storage::RemoteStorageConfig; -use sd_notify::NotifyState; -use tokio::runtime::Handle; -use tokio::signal::unix::{signal, SignalKind}; -use tokio::task::JoinError; -use utils::logging::SecretString; - -use std::env::{var, VarError}; +use std::env::{VarError, var}; use std::fs::{self, File}; use std::io::{ErrorKind, Write}; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; -use storage_broker::Uri; - -use tracing::*; -use utils::pid_file; +use anyhow::{Context, Result, bail}; +use camino::{Utf8Path, Utf8PathBuf}; +use clap::{ArgAction, Parser}; +use futures::future::BoxFuture; +use futures::stream::FuturesUnordered; +use futures::{FutureExt, StreamExt}; use metrics::set_build_info_metric; +use remote_storage::RemoteStorageConfig; use safekeeper::defaults::{ DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, }; -use safekeeper::http; -use safekeeper::wal_service; -use safekeeper::GlobalTimelines; -use safekeeper::SafeKeeperConf; -use safekeeper::{broker, WAL_SERVICE_RUNTIME}; -use safekeeper::{control_file, BROKER_RUNTIME}; -use safekeeper::{wal_backup, HTTP_RUNTIME}; -use storage_broker::DEFAULT_ENDPOINT; -use utils::auth::{JwtAuth, Scope, SwappableJwtAuth}; -use utils::{ - id::NodeId, - logging::{self, LogFormat}, - project_build_tag, project_git_version, - sentry_init::init_sentry, - tcp_listener, +use safekeeper::{ + BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf, WAL_SERVICE_RUNTIME, broker, + control_file, http, wal_backup, wal_service, }; +use sd_notify::NotifyState; +use storage_broker::{DEFAULT_ENDPOINT, Uri}; +use tokio::runtime::Handle; +use tokio::signal::unix::{SignalKind, signal}; +use tokio::task::JoinError; +use tracing::*; +use utils::auth::{JwtAuth, Scope, SwappableJwtAuth}; +use utils::id::NodeId; +use utils::logging::{self, LogFormat, SecretString}; +use utils::sentry_init::init_sentry; +use utils::{pid_file, project_build_tag, project_git_version, tcp_listener}; #[global_allocator] static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; @@ -55,7 +44,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; /// This adds roughly 3% overhead for allocations on average, which is acceptable considering /// performance-sensitive code will avoid allocations as far as possible anyway. #[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] +#[unsafe(export_name = "malloc_conf")] pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0"; const PID_FILE_NAME: &str = "safekeeper.pid"; diff --git a/safekeeper/src/broker.rs b/safekeeper/src/broker.rs index 4b091e2c29..de6e275124 100644 --- a/safekeeper/src/broker.rs +++ b/safekeeper/src/broker.rs @@ -1,39 +1,25 @@ //! Communication with the broker, providing safekeeper peers and pageserver coordination. -use anyhow::anyhow; -use anyhow::bail; -use anyhow::Context; - -use anyhow::Error; -use anyhow::Result; - -use storage_broker::parse_proto_ttid; - -use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey; -use storage_broker::proto::FilterTenantTimelineId; -use storage_broker::proto::MessageType; -use storage_broker::proto::SafekeeperDiscoveryResponse; -use storage_broker::proto::SubscribeByFilterRequest; -use storage_broker::proto::SubscribeSafekeeperInfoRequest; -use storage_broker::proto::TypeSubscription; -use storage_broker::proto::TypedMessage; -use storage_broker::Request; - -use std::sync::atomic::AtomicU64; use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; -use std::time::UNIX_EPOCH; +use std::sync::atomic::AtomicU64; +use std::time::{Duration, Instant, UNIX_EPOCH}; + +use anyhow::{Context, Error, Result, anyhow, bail}; +use storage_broker::proto::subscribe_safekeeper_info_request::SubscriptionKey as ProtoSubscriptionKey; +use storage_broker::proto::{ + FilterTenantTimelineId, MessageType, SafekeeperDiscoveryResponse, SubscribeByFilterRequest, + SubscribeSafekeeperInfoRequest, TypeSubscription, TypedMessage, +}; +use storage_broker::{Request, parse_proto_ttid}; use tokio::task::JoinHandle; use tokio::time::sleep; use tracing::*; -use crate::metrics::BROKER_ITERATION_TIMELINES; -use crate::metrics::BROKER_PULLED_UPDATES; -use crate::metrics::BROKER_PUSHED_UPDATES; -use crate::metrics::BROKER_PUSH_ALL_UPDATES_SECONDS; -use crate::GlobalTimelines; -use crate::SafeKeeperConf; +use crate::metrics::{ + BROKER_ITERATION_TIMELINES, BROKER_PULLED_UPDATES, BROKER_PUSH_ALL_UPDATES_SECONDS, + BROKER_PUSHED_UPDATES, +}; +use crate::{GlobalTimelines, SafeKeeperConf}; const RETRY_INTERVAL_MSEC: u64 = 1000; const PUSH_INTERVAL_MSEC: u64 = 1000; diff --git a/safekeeper/src/control_file.rs b/safekeeper/src/control_file.rs index 35aebfd8ad..1bf3e4cac1 100644 --- a/safekeeper/src/control_file.rs +++ b/safekeeper/src/control_file.rs @@ -1,24 +1,23 @@ //! Control file serialization, deserialization and persistence. -use anyhow::{bail, ensure, Context, Result}; -use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; -use camino::{Utf8Path, Utf8PathBuf}; -use safekeeper_api::membership::INVALID_GENERATION; -use tokio::fs::File; -use tokio::io::AsyncWriteExt; -use utils::crashsafe::durable_rename; - use std::future::Future; use std::io::Read; use std::ops::Deref; use std::path::Path; use std::time::Instant; -use crate::control_file_upgrade::downgrade_v10_to_v9; -use crate::control_file_upgrade::upgrade_control_file; +use anyhow::{Context, Result, bail, ensure}; +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use camino::{Utf8Path, Utf8PathBuf}; +use safekeeper_api::membership::INVALID_GENERATION; +use tokio::fs::File; +use tokio::io::AsyncWriteExt; +use utils::bin_ser::LeSer; +use utils::crashsafe::durable_rename; + +use crate::control_file_upgrade::{downgrade_v10_to_v9, upgrade_control_file}; use crate::metrics::PERSIST_CONTROL_FILE_SECONDS; use crate::state::{EvictionState, TimelinePersistentState}; -use utils::bin_ser::LeSer; pub const SK_MAGIC: u32 = 0xcafeceefu32; pub const SK_FORMAT_VERSION: u32 = 10; @@ -234,11 +233,12 @@ impl Storage for FileStorage { #[cfg(test)] mod test { - use super::*; use safekeeper_api::membership::{Configuration, MemberSet, SafekeeperGeneration}; use tokio::fs; use utils::lsn::Lsn; + use super::*; + const NO_SYNC: bool = true; #[tokio::test] diff --git a/safekeeper/src/control_file_upgrade.rs b/safekeeper/src/control_file_upgrade.rs index 904e79f976..1ad9e62f9b 100644 --- a/safekeeper/src/control_file_upgrade.rs +++ b/safekeeper/src/control_file_upgrade.rs @@ -1,24 +1,19 @@ //! Code to deal with safekeeper control file upgrades use std::vec; -use crate::{ - safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn}, - state::{EvictionState, TimelinePersistentState}, - wal_backup_partial, -}; -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; use pq_proto::SystemId; -use safekeeper_api::{ - membership::{Configuration, INVALID_GENERATION}, - ServerInfo, Term, -}; +use safekeeper_api::membership::{Configuration, INVALID_GENERATION}; +use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tracing::*; -use utils::{ - bin_ser::LeSer, - id::{NodeId, TenantId, TimelineId}, - lsn::Lsn, -}; +use utils::bin_ser::LeSer; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; + +use crate::safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn}; +use crate::state::{EvictionState, TimelinePersistentState}; +use crate::wal_backup_partial; /// Persistent consensus state of the acceptor. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] @@ -552,11 +547,11 @@ pub fn downgrade_v10_to_v9(state: &TimelinePersistentState) -> TimelinePersisten mod tests { use std::str::FromStr; - use utils::{id::NodeId, Hex}; - - use crate::control_file_upgrade::PersistedPeerInfo; + use utils::Hex; + use utils::id::NodeId; use super::*; + use crate::control_file_upgrade::PersistedPeerInfo; #[test] fn roundtrip_v1() { diff --git a/safekeeper/src/copy_timeline.rs b/safekeeper/src/copy_timeline.rs index 10a761e1f5..11daff22cb 100644 --- a/safekeeper/src/copy_timeline.rs +++ b/safekeeper/src/copy_timeline.rs @@ -1,24 +1,22 @@ -use anyhow::{bail, Result}; +use std::sync::Arc; + +use anyhow::{Result, bail}; use camino::Utf8PathBuf; use postgres_ffi::{MAX_SEND_SIZE, WAL_SEGMENT_SIZE}; use safekeeper_api::membership::Configuration; -use std::sync::Arc; -use tokio::{ - fs::OpenOptions, - io::{AsyncSeekExt, AsyncWriteExt}, -}; +use tokio::fs::OpenOptions; +use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tracing::{info, warn}; -use utils::{id::TenantTimelineId, lsn::Lsn}; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; -use crate::{ - control_file::FileStorage, - state::TimelinePersistentState, - timeline::{TimelineError, WalResidentTimeline}, - timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, - wal_backup::copy_s3_segments, - wal_storage::{wal_file_paths, WalReader}, - GlobalTimelines, -}; +use crate::GlobalTimelines; +use crate::control_file::FileStorage; +use crate::state::TimelinePersistentState; +use crate::timeline::{TimelineError, WalResidentTimeline}; +use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}; +use crate::wal_backup::copy_s3_segments; +use crate::wal_storage::{WalReader, wal_file_paths}; // we don't want to have more than 10 segments on disk after copy, because they take space const MAX_BACKUP_LAG: u64 = 10 * WAL_SEGMENT_SIZE as u64; diff --git a/safekeeper/src/debug_dump.rs b/safekeeper/src/debug_dump.rs index 19362a0992..68a38e1498 100644 --- a/safekeeper/src/debug_dump.rs +++ b/safekeeper/src/debug_dump.rs @@ -2,37 +2,25 @@ use std::fs; use std::fs::DirEntry; -use std::io::BufReader; -use std::io::Read; +use std::io::{BufReader, Read}; use std::path::PathBuf; use std::sync::Arc; -use anyhow::bail; -use anyhow::Result; -use camino::Utf8Path; -use camino::Utf8PathBuf; +use anyhow::{Result, bail}; +use camino::{Utf8Path, Utf8PathBuf}; use chrono::{DateTime, Utc}; -use postgres_ffi::XLogSegNo; -use postgres_ffi::MAX_SEND_SIZE; -use safekeeper_api::models::WalSenderState; -use serde::Deserialize; -use serde::Serialize; - use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName}; +use postgres_ffi::{MAX_SEND_SIZE, XLogSegNo}; +use safekeeper_api::models::WalSenderState; +use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use utils::id::NodeId; -use utils::id::TenantTimelineId; -use utils::id::{TenantId, TimelineId}; +use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId}; use utils::lsn::Lsn; use crate::safekeeper::TermHistory; -use crate::state::TimelineMemState; -use crate::state::TimelinePersistentState; -use crate::timeline::get_timeline_dir; -use crate::timeline::WalResidentTimeline; -use crate::timeline_manager; -use crate::GlobalTimelines; -use crate::SafeKeeperConf; +use crate::state::{TimelineMemState, TimelinePersistentState}; +use crate::timeline::{WalResidentTimeline, get_timeline_dir}; +use crate::{GlobalTimelines, SafeKeeperConf, timeline_manager}; /// Various filters that influence the resulting JSON output. #[derive(Debug, Serialize, Deserialize, Clone)] diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index e77eeb4130..dd7008c87d 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -1,35 +1,32 @@ //! Part of Safekeeper pretending to be Postgres, i.e. handling Postgres //! protocol commands. +use std::future::Future; +use std::str::{self, FromStr}; +use std::sync::Arc; + use anyhow::Context; use pageserver_api::models::ShardParameters; use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; -use safekeeper_api::models::ConnectionId; +use postgres_backend::{PostgresBackend, QueryError}; +use postgres_ffi::PG_TLI; +use pq_proto::{BeMessage, FeStartupPacket, INT4_OID, RowDescriptor, TEXT_OID}; +use regex::Regex; use safekeeper_api::Term; -use std::future::Future; -use std::str::{self, FromStr}; -use std::sync::Arc; +use safekeeper_api::models::ConnectionId; use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::{debug, info, info_span, Instrument}; +use tracing::{Instrument, debug, info, info_span}; +use utils::auth::{Claims, JwtAuth, Scope}; +use utils::id::{TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; use utils::postgres_client::PostgresClientProtocol; use utils::shard::{ShardCount, ShardNumber}; use crate::auth::check_permission; -use crate::json_ctrl::{handle_json_ctrl, AppendLogicalMessage}; - -use crate::metrics::{TrafficMetrics, PG_QUERIES_GAUGE}; +use crate::json_ctrl::{AppendLogicalMessage, handle_json_ctrl}; +use crate::metrics::{PG_QUERIES_GAUGE, TrafficMetrics}; use crate::timeline::TimelineError; use crate::{GlobalTimelines, SafeKeeperConf}; -use postgres_backend::PostgresBackend; -use postgres_backend::QueryError; -use postgres_ffi::PG_TLI; -use pq_proto::{BeMessage, FeStartupPacket, RowDescriptor, INT4_OID, TEXT_OID}; -use regex::Regex; -use utils::auth::{Claims, JwtAuth, Scope}; -use utils::{ - id::{TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, -}; /// Safekeeper handler of postgres commands pub struct SafekeeperPostgresHandler { diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs index 6e160b7a5e..f162985ef7 100644 --- a/safekeeper/src/http/mod.rs +++ b/safekeeper/src/http/mod.rs @@ -1,9 +1,9 @@ pub mod routes; -pub use routes::make_router; - -pub use safekeeper_api::models; use std::sync::Arc; +pub use routes::make_router; +pub use safekeeper_api::models; + use crate::{GlobalTimelines, SafeKeeperConf}; pub async fn task_main( diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index cd2ac5f44c..3b3bc71ac4 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -1,51 +1,41 @@ -use http_utils::failpoints::failpoints_handler; -use hyper::{Body, Request, Response, StatusCode}; -use safekeeper_api::models; -use safekeeper_api::models::AcceptorStateStatus; -use safekeeper_api::models::PullTimelineRequest; -use safekeeper_api::models::SafekeeperStatus; -use safekeeper_api::models::TermSwitchApiEntry; -use safekeeper_api::models::TimelineStatus; -use safekeeper_api::ServerInfo; use std::collections::HashMap; use std::fmt; use std::io::Write as _; use std::str::FromStr; use std::sync::Arc; -use storage_broker::proto::SafekeeperTimelineInfo; -use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; + +use http_utils::endpoint::{ + self, ChannelWriter, auth_middleware, check_permission_with, profile_cpu_handler, + profile_heap_handler, prometheus_metrics_handler, request_span, +}; +use http_utils::error::ApiError; +use http_utils::failpoints::failpoints_handler; +use http_utils::json::{json_request, json_response}; +use http_utils::request::{ensure_no_body, parse_query_param, parse_request_param}; +use http_utils::{RequestExt, RouterBuilder}; +use hyper::{Body, Request, Response, StatusCode}; +use postgres_ffi::WAL_SEGMENT_SIZE; +use safekeeper_api::models::{ + AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry, + TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest, +}; +use safekeeper_api::{ServerInfo, models}; +use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId}; use tokio::sync::mpsc; use tokio::task; use tokio_stream::wrappers::ReceiverStream; use tokio_util::sync::CancellationToken; -use tracing::{info_span, Instrument}; - -use http_utils::endpoint::{ - profile_cpu_handler, profile_heap_handler, prometheus_metrics_handler, request_span, -}; -use http_utils::{ - endpoint::{self, auth_middleware, check_permission_with, ChannelWriter}, - error::ApiError, - json::{json_request, json_response}, - request::{ensure_no_body, parse_query_param, parse_request_param}, - RequestExt, RouterBuilder, -}; - -use postgres_ffi::WAL_SEGMENT_SIZE; -use safekeeper_api::models::{SkTimelineInfo, TimelineCopyRequest}; -use safekeeper_api::models::{TimelineCreateRequest, TimelineTermBumpRequest}; -use utils::{ - auth::SwappableJwtAuth, - id::{TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, -}; +use tracing::{Instrument, info_span}; +use utils::auth::SwappableJwtAuth; +use utils::id::{TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; use crate::debug_dump::TimelineDigestRequest; use crate::safekeeper::TermLsn; use crate::timelines_global_map::TimelineDeleteForceResult; -use crate::GlobalTimelines; -use crate::SafeKeeperConf; -use crate::{copy_timeline, debug_dump, patch_control_file, pull_timeline}; +use crate::{ + GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline, +}; /// Healthcheck handler. async fn status_handler(request: Request) -> Result, ApiError> { diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs index 8d7c1109ad..793ea9c3e9 100644 --- a/safekeeper/src/json_ctrl.rs +++ b/safekeeper/src/json_ctrl.rs @@ -7,26 +7,23 @@ //! use anyhow::Context; -use postgres_backend::QueryError; +use postgres_backend::{PostgresBackend, QueryError}; +use postgres_ffi::{WAL_SEGMENT_SIZE, encode_logical_message}; +use pq_proto::{BeMessage, RowDescriptor, TEXT_OID}; use safekeeper_api::membership::{Configuration, INVALID_GENERATION}; use safekeeper_api::{ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::*; +use utils::lsn::Lsn; use crate::handler::SafekeeperPostgresHandler; -use crate::safekeeper::{AcceptorProposerMessage, AppendResponse}; use crate::safekeeper::{ - AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, ProposerElected, + AcceptorProposerMessage, AppendRequest, AppendRequestHeader, AppendResponse, + ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn, }; -use crate::safekeeper::{TermHistory, TermLsn}; use crate::state::TimelinePersistentState; use crate::timeline::WalResidentTimeline; -use postgres_backend::PostgresBackend; -use postgres_ffi::encode_logical_message; -use postgres_ffi::WAL_SEGMENT_SIZE; -use pq_proto::{BeMessage, RowDescriptor, TEXT_OID}; -use utils::lsn::Lsn; #[derive(Serialize, Deserialize, Debug)] pub struct AppendLogicalMessage { diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index e0090c638a..c52b097066 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -2,15 +2,16 @@ extern crate hyper0 as hyper; +use std::time::Duration; + use camino::Utf8PathBuf; use once_cell::sync::Lazy; use remote_storage::RemoteStorageConfig; -use tokio::runtime::Runtime; - -use std::time::Duration; use storage_broker::Uri; - -use utils::{auth::SwappableJwtAuth, id::NodeId, logging::SecretString}; +use tokio::runtime::Runtime; +use utils::auth::SwappableJwtAuth; +use utils::id::NodeId; +use utils::logging::SecretString; mod auth; pub mod broker; @@ -48,6 +49,7 @@ pub mod test_utils; mod timelines_global_map; use std::sync::Arc; + pub use timelines_global_map::GlobalTimelines; use utils::auth::JwtAuth; diff --git a/safekeeper/src/metrics.rs b/safekeeper/src/metrics.rs index 3ea9e3d674..cb21a5f6d2 100644 --- a/safekeeper/src/metrics.rs +++ b/safekeeper/src/metrics.rs @@ -1,30 +1,28 @@ //! Global safekeeper mertics and per-timeline safekeeper metrics. -use std::{ - sync::{Arc, RwLock}, - time::{Instant, SystemTime}, -}; +use std::sync::{Arc, RwLock}; +use std::time::{Instant, SystemTime}; use anyhow::Result; use futures::Future; +use metrics::core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts}; +use metrics::proto::MetricFamily; use metrics::{ - core::{AtomicU64, Collector, Desc, GenericCounter, GenericGaugeVec, Opts}, - pow2_buckets, - proto::MetricFamily, + DISK_FSYNC_SECONDS_BUCKETS, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, + IntCounterPair, IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, pow2_buckets, register_histogram, register_histogram_vec, register_int_counter, register_int_counter_pair, register_int_counter_pair_vec, register_int_counter_vec, register_int_gauge, - register_int_gauge_vec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair, - IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, DISK_FSYNC_SECONDS_BUCKETS, + register_int_gauge_vec, }; use once_cell::sync::Lazy; use postgres_ffi::XLogSegNo; -use utils::{id::TenantTimelineId, lsn::Lsn, pageserver_feedback::PageserverFeedback}; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; -use crate::{ - receive_wal::MSG_QUEUE_SIZE, - state::{TimelineMemState, TimelinePersistentState}, - GlobalTimelines, -}; +use crate::GlobalTimelines; +use crate::receive_wal::MSG_QUEUE_SIZE; +use crate::state::{TimelineMemState, TimelinePersistentState}; // Global metrics across all timelines. pub static WRITE_WAL_BYTES: Lazy = Lazy::new(|| { diff --git a/safekeeper/src/patch_control_file.rs b/safekeeper/src/patch_control_file.rs index 2136d1b5f7..efdbd9b3d7 100644 --- a/safekeeper/src/patch_control_file.rs +++ b/safekeeper/src/patch_control_file.rs @@ -4,7 +4,8 @@ use serde::{Deserialize, Serialize}; use serde_json::Value; use tracing::info; -use crate::{state::TimelinePersistentState, timeline::Timeline}; +use crate::state::TimelinePersistentState; +use crate::timeline::Timeline; #[derive(Deserialize, Debug, Clone)] pub struct Request { diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 4827b73074..fc58b8509a 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -1,46 +1,38 @@ -use anyhow::{anyhow, bail, Context, Result}; +use std::cmp::min; +use std::io::{self, ErrorKind}; +use std::sync::Arc; + +use anyhow::{Context, Result, anyhow, bail}; use bytes::Bytes; use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; -use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; -use safekeeper_api::{ - models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus}, - Term, -}; +use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo}; +use safekeeper_api::Term; +use safekeeper_api::models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus}; use safekeeper_client::mgmt_api; use safekeeper_client::mgmt_api::Client; use serde::Deserialize; -use std::{ - cmp::min, - io::{self, ErrorKind}, - sync::Arc, -}; -use tokio::{fs::OpenOptions, io::AsyncWrite, sync::mpsc, task}; +use tokio::fs::OpenOptions; +use tokio::io::AsyncWrite; +use tokio::sync::mpsc; +use tokio::task; use tokio_tar::{Archive, Builder, Header}; -use tokio_util::{ - io::{CopyToBytes, SinkWriter}, - sync::PollSender, -}; +use tokio_util::io::{CopyToBytes, SinkWriter}; +use tokio_util::sync::PollSender; use tracing::{error, info, instrument}; +use utils::crashsafe::fsync_async_opt; +use utils::id::{NodeId, TenantTimelineId}; +use utils::logging::SecretString; +use utils::lsn::Lsn; +use utils::pausable_failpoint; -use crate::{ - control_file::CONTROL_FILE_NAME, - debug_dump, - state::{EvictionState, TimelinePersistentState}, - timeline::{Timeline, WalResidentTimeline}, - timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}, - wal_backup, - wal_storage::open_wal_file, - GlobalTimelines, -}; -use utils::{ - crashsafe::fsync_async_opt, - id::{NodeId, TenantTimelineId}, - logging::SecretString, - lsn::Lsn, - pausable_failpoint, -}; +use crate::control_file::CONTROL_FILE_NAME; +use crate::state::{EvictionState, TimelinePersistentState}; +use crate::timeline::{Timeline, WalResidentTimeline}; +use crate::timelines_global_map::{create_temp_timeline_dir, validate_temp_timeline}; +use crate::wal_storage::open_wal_file; +use crate::{GlobalTimelines, debug_dump, wal_backup}; /// Stream tar archive of timeline to tx. #[instrument(name = "snapshot", skip_all, fields(ttid = %tli.ttid))] @@ -374,8 +366,13 @@ impl WalResidentTimeline { // change, but as long as older history is strictly part of new that's // fine), but there is no need to do it. if bctx.term != term || bctx.last_log_term != last_log_term { - bail!("term(s) changed during snapshot: were term={}, last_log_term={}, now term={}, last_log_term={}", - bctx.term, bctx.last_log_term, term, last_log_term); + bail!( + "term(s) changed during snapshot: were term={}, last_log_term={}, now term={}, last_log_term={}", + bctx.term, + bctx.last_log_term, + term, + last_log_term + ); } Ok(()) } diff --git a/safekeeper/src/receive_wal.rs b/safekeeper/src/receive_wal.rs index a94e6930e1..7967acde3f 100644 --- a/safekeeper/src/receive_wal.rs +++ b/safekeeper/src/receive_wal.rs @@ -2,35 +2,21 @@ //! Gets messages from the network, passes them down to consensus module and //! sends replies back. -use crate::handler::SafekeeperPostgresHandler; -use crate::metrics::{ - WAL_RECEIVERS, WAL_RECEIVER_QUEUE_DEPTH, WAL_RECEIVER_QUEUE_DEPTH_TOTAL, - WAL_RECEIVER_QUEUE_SIZE_TOTAL, -}; -use crate::safekeeper::AcceptorProposerMessage; -use crate::safekeeper::ProposerAcceptorMessage; -use crate::timeline::WalResidentTimeline; -use crate::GlobalTimelines; -use anyhow::{anyhow, Context}; -use bytes::BytesMut; -use parking_lot::MappedMutexGuard; -use parking_lot::Mutex; -use parking_lot::MutexGuard; -use postgres_backend::CopyStreamHandlerEnd; -use postgres_backend::PostgresBackend; -use postgres_backend::PostgresBackendReader; -use postgres_backend::QueryError; -use pq_proto::BeMessage; -use safekeeper_api::membership::Configuration; -use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus}; -use safekeeper_api::ServerInfo; use std::future; use std::net::SocketAddr; use std::sync::Arc; -use tokio::io::AsyncRead; -use tokio::io::AsyncWrite; + +use anyhow::{Context, anyhow}; +use bytes::BytesMut; +use parking_lot::{MappedMutexGuard, Mutex, MutexGuard}; +use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend, PostgresBackendReader, QueryError}; +use pq_proto::BeMessage; +use safekeeper_api::ServerInfo; +use safekeeper_api::membership::Configuration; +use safekeeper_api::models::{ConnectionId, WalReceiverState, WalReceiverStatus}; +use tokio::io::{AsyncRead, AsyncWrite}; use tokio::sync::mpsc::error::SendTimeoutError; -use tokio::sync::mpsc::{channel, Receiver, Sender}; +use tokio::sync::mpsc::{Receiver, Sender, channel}; use tokio::task; use tokio::task::JoinHandle; use tokio::time::{Duration, Instant, MissedTickBehavior}; @@ -39,6 +25,15 @@ use utils::id::TenantTimelineId; use utils::lsn::Lsn; use utils::pageserver_feedback::PageserverFeedback; +use crate::GlobalTimelines; +use crate::handler::SafekeeperPostgresHandler; +use crate::metrics::{ + WAL_RECEIVER_QUEUE_DEPTH, WAL_RECEIVER_QUEUE_DEPTH_TOTAL, WAL_RECEIVER_QUEUE_SIZE_TOTAL, + WAL_RECEIVERS, +}; +use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage}; +use crate::timeline::WalResidentTimeline; + const DEFAULT_FEEDBACK_CAPACITY: usize = 8; /// Registry of WalReceivers (compute connections). Timeline holds it (wrapped @@ -371,7 +366,7 @@ impl NetworkReader<'_, IO> { _ => { return Err(CopyStreamHandlerEnd::Other(anyhow::anyhow!( "unexpected message {next_msg:?} instead of greeting" - ))) + ))); } }; Ok((tli, next_msg)) diff --git a/safekeeper/src/recovery.rs b/safekeeper/src/recovery.rs index 3e9080ebbe..c2760792b8 100644 --- a/safekeeper/src/recovery.rs +++ b/safekeeper/src/recovery.rs @@ -1,40 +1,36 @@ //! This module implements pulling WAL from peer safekeepers if compute can't //! provide it, i.e. safekeeper lags too much. +use std::fmt; +use std::pin::pin; use std::time::SystemTime; -use std::{fmt, pin::pin}; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use futures::StreamExt; use postgres_protocol::message::backend::ReplicationMessage; +use safekeeper_api::Term; use safekeeper_api::membership::INVALID_GENERATION; use safekeeper_api::models::{PeerInfo, TimelineStatus}; -use safekeeper_api::Term; -use tokio::sync::mpsc::{channel, Receiver, Sender}; -use tokio::time::timeout; -use tokio::{ - select, - time::sleep, - time::{self, Duration}, -}; +use tokio::select; +use tokio::sync::mpsc::{Receiver, Sender, channel}; +use tokio::time::{self, Duration, sleep, timeout}; use tokio_postgres::replication::ReplicationStream; use tokio_postgres::types::PgLsn; use tracing::*; -use utils::postgres_client::{ConnectionConfigArgs, PostgresClientProtocol}; -use utils::{id::NodeId, lsn::Lsn, postgres_client::wal_stream_connection_config}; - -use crate::receive_wal::{WalAcceptor, REPLY_QUEUE_SIZE}; -use crate::safekeeper::{AppendRequest, AppendRequestHeader}; -use crate::timeline::WalResidentTimeline; -use crate::{ - receive_wal::MSG_QUEUE_SIZE, - safekeeper::{ - AcceptorProposerMessage, ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn, - VoteRequest, - }, - SafeKeeperConf, +use utils::id::NodeId; +use utils::lsn::Lsn; +use utils::postgres_client::{ + ConnectionConfigArgs, PostgresClientProtocol, wal_stream_connection_config, }; +use crate::SafeKeeperConf; +use crate::receive_wal::{MSG_QUEUE_SIZE, REPLY_QUEUE_SIZE, WalAcceptor}; +use crate::safekeeper::{ + AcceptorProposerMessage, AppendRequest, AppendRequestHeader, ProposerAcceptorMessage, + ProposerElected, TermHistory, TermLsn, VoteRequest, +}; +use crate::timeline::WalResidentTimeline; + /// Entrypoint for per timeline task which always runs, checking whether /// recovery for this safekeeper is needed and starting it if so. #[instrument(name = "recovery", skip_all, fields(ttid = %tli.ttid))] @@ -355,7 +351,9 @@ async fn recovery_stream( { Ok(client_and_conn) => client_and_conn?, Err(_elapsed) => { - bail!("timed out while waiting {connect_timeout:?} for connection to peer safekeeper to open"); + bail!( + "timed out while waiting {connect_timeout:?} for connection to peer safekeeper to open" + ); } }; trace!("connected to {:?}", donor); diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index f429cafed2..0edac04b97 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -1,39 +1,31 @@ //! Acceptor part of proposer-acceptor consensus algorithm. -use anyhow::{bail, Context, Result}; -use byteorder::{LittleEndian, ReadBytesExt}; -use bytes::{Buf, BufMut, Bytes, BytesMut}; - -use postgres_ffi::{TimeLineID, MAX_SEND_SIZE}; -use safekeeper_api::membership; -use safekeeper_api::membership::MemberSet; -use safekeeper_api::membership::SafekeeperGeneration as Generation; -use safekeeper_api::membership::SafekeeperId; -use safekeeper_api::membership::INVALID_GENERATION; -use safekeeper_api::models::HotStandbyFeedback; -use safekeeper_api::Term; -use serde::{Deserialize, Serialize}; -use std::cmp::max; -use std::cmp::min; +use std::cmp::{max, min}; use std::fmt; use std::io::Read; use std::str::FromStr; -use storage_broker::proto::SafekeeperTimelineInfo; -use tracing::*; - -use crate::control_file; -use crate::metrics::MISC_OPERATION_SECONDS; - -use crate::state::TimelineState; -use crate::wal_storage; +use anyhow::{Context, Result, bail}; +use byteorder::{LittleEndian, ReadBytesExt}; +use bytes::{Buf, BufMut, Bytes, BytesMut}; +use postgres_ffi::{MAX_SEND_SIZE, TimeLineID}; use pq_proto::SystemId; -use utils::pageserver_feedback::PageserverFeedback; -use utils::{ - bin_ser::LeSer, - id::{NodeId, TenantId, TimelineId}, - lsn::Lsn, +use safekeeper_api::membership::{ + INVALID_GENERATION, MemberSet, SafekeeperGeneration as Generation, SafekeeperId, }; +use safekeeper_api::models::HotStandbyFeedback; +use safekeeper_api::{Term, membership}; +use serde::{Deserialize, Serialize}; +use storage_broker::proto::SafekeeperTimelineInfo; +use tracing::*; +use utils::bin_ser::LeSer; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; + +use crate::metrics::MISC_OPERATION_SECONDS; +use crate::state::TimelineState; +use crate::{control_file, wal_storage}; pub const SK_PROTO_VERSION_2: u32 = 2; pub const SK_PROTO_VERSION_3: u32 = 3; @@ -1137,9 +1129,14 @@ where // and walproposer recalculates the streaming point. OTOH repeating // error indicates a serious bug. if last_common_point.lsn != msg.start_streaming_at { - bail!("refusing ProposerElected with unexpected truncation point: lcp={:?} start_streaming_at={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}", - last_common_point, msg.start_streaming_at, - self.state.acceptor_state.term, sk_th, self.flush_lsn(), msg.term_history, + bail!( + "refusing ProposerElected with unexpected truncation point: lcp={:?} start_streaming_at={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}", + last_common_point, + msg.start_streaming_at, + self.state.acceptor_state.term, + sk_th, + self.flush_lsn(), + msg.term_history, ); } @@ -1147,8 +1144,12 @@ where assert!( msg.start_streaming_at >= self.state.inmem.commit_lsn, "attempt to truncate committed data: start_streaming_at={}, commit_lsn={}, term={}, sk_th={:?} flush_lsn={}, wp_th={:?}", - msg.start_streaming_at, self.state.inmem.commit_lsn, - self.state.acceptor_state.term, sk_th, self.flush_lsn(), msg.term_history, + msg.start_streaming_at, + self.state.inmem.commit_lsn, + self.state.acceptor_state.term, + sk_th, + self.flush_lsn(), + msg.term_history, ); // Before first WAL write initialize its segment. It makes first segment @@ -1373,21 +1374,19 @@ where #[cfg(test)] mod tests { - use futures::future::BoxFuture; + use std::ops::Deref; + use std::str::FromStr; + use std::time::{Instant, UNIX_EPOCH}; - use postgres_ffi::{XLogSegNo, WAL_SEGMENT_SIZE}; - use safekeeper_api::{ - membership::{Configuration, MemberSet, SafekeeperGeneration, SafekeeperId}, - ServerInfo, + use futures::future::BoxFuture; + use postgres_ffi::{WAL_SEGMENT_SIZE, XLogSegNo}; + use safekeeper_api::ServerInfo; + use safekeeper_api::membership::{ + Configuration, MemberSet, SafekeeperGeneration, SafekeeperId, }; use super::*; use crate::state::{EvictionState, TimelinePersistentState}; - use std::{ - ops::Deref, - str::FromStr, - time::{Instant, UNIX_EPOCH}, - }; // fake storage for tests struct InMemoryState { diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index 0662bb9518..be0c849a5f 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -3,23 +3,22 @@ use std::fmt::Display; use std::sync::Arc; use std::time::Duration; -use anyhow::{anyhow, Context}; -use futures::future::Either; +use anyhow::{Context, anyhow}; use futures::StreamExt; +use futures::future::Either; use pageserver_api::shard::ShardIdentity; use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend}; -use postgres_ffi::waldecoder::WalDecodeError; -use postgres_ffi::{get_current_timestamp, waldecoder::WalStreamDecoder}; +use postgres_ffi::get_current_timestamp; +use postgres_ffi::waldecoder::{WalDecodeError, WalStreamDecoder}; use pq_proto::{BeMessage, InterpretedWalRecordsBody, WalSndKeepAlive}; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::sync::mpsc::error::SendError; use tokio::task::JoinHandle; use tokio::time::MissedTickBehavior; -use tracing::{error, info, info_span, Instrument}; +use tracing::{Instrument, error, info, info_span}; use utils::critical; use utils::lsn::Lsn; -use utils::postgres_client::Compression; -use utils::postgres_client::InterpretedFormat; +use utils::postgres_client::{Compression, InterpretedFormat}; use wal_decoder::models::{InterpretedWalRecord, InterpretedWalRecords}; use wal_decoder::wire_format::ToWireFormat; @@ -691,22 +690,20 @@ impl InterpretedWalSender<'_, IO> { } #[cfg(test)] mod tests { - use std::{collections::HashMap, str::FromStr, time::Duration}; + use std::collections::HashMap; + use std::str::FromStr; + use std::time::Duration; use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; use postgres_ffi::MAX_SEND_SIZE; use tokio::sync::mpsc::error::TryRecvError; - use utils::{ - id::{NodeId, TenantTimelineId}, - lsn::Lsn, - shard::{ShardCount, ShardNumber}, - }; + use utils::id::{NodeId, TenantTimelineId}; + use utils::lsn::Lsn; + use utils::shard::{ShardCount, ShardNumber}; - use crate::{ - send_interpreted_wal::{AttachShardNotification, Batch, InterpretedWalReader}, - test_utils::Env, - wal_reader_stream::StreamingWalReader, - }; + use crate::send_interpreted_wal::{AttachShardNotification, Batch, InterpretedWalReader}; + use crate::test_utils::Env; + use crate::wal_reader_stream::StreamingWalReader; #[tokio::test] async fn test_interpreted_wal_reader_fanout() { @@ -808,9 +805,11 @@ mod tests { // This test uses logical messages. Those only go to shard 0. Check that the // filtering worked and shard 1 did not get any. - assert!(shard_1_interpreted_records - .iter() - .all(|recs| recs.records.is_empty())); + assert!( + shard_1_interpreted_records + .iter() + .all(|recs| recs.records.is_empty()) + ); // Shard 0 should not receive anything more since the reader is // going through wal that it has already processed. diff --git a/safekeeper/src/send_wal.rs b/safekeeper/src/send_wal.rs index 72b1fd9fc3..33e3d0485c 100644 --- a/safekeeper/src/send_wal.rs +++ b/safekeeper/src/send_wal.rs @@ -1,6 +1,34 @@ //! This module implements the streaming side of replication protocol, starting //! with the "START_REPLICATION" message, and registry of walsenders. +use std::cmp::{max, min}; +use std::net::SocketAddr; +use std::sync::Arc; +use std::time::Duration; + +use anyhow::{Context as AnyhowContext, bail}; +use bytes::Bytes; +use futures::FutureExt; +use itertools::Itertools; +use parking_lot::Mutex; +use postgres_backend::{CopyStreamHandlerEnd, PostgresBackend, PostgresBackendReader, QueryError}; +use postgres_ffi::{MAX_SEND_SIZE, TimestampTz, get_current_timestamp}; +use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; +use safekeeper_api::Term; +use safekeeper_api::models::{ + HotStandbyFeedback, INVALID_FULL_TRANSACTION_ID, ReplicationFeedback, StandbyFeedback, + StandbyReply, +}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tokio::sync::watch::Receiver; +use tokio::time::timeout; +use tracing::*; +use utils::bin_ser::BeSer; +use utils::failpoint_support; +use utils::lsn::Lsn; +use utils::pageserver_feedback::PageserverFeedback; +use utils::postgres_client::PostgresClientProtocol; + use crate::handler::SafekeeperPostgresHandler; use crate::metrics::{RECEIVED_PS_FEEDBACKS, WAL_READERS}; use crate::receive_wal::WalReceivers; @@ -11,34 +39,6 @@ use crate::send_interpreted_wal::{ use crate::timeline::WalResidentTimeline; use crate::wal_reader_stream::StreamingWalReader; use crate::wal_storage::WalReader; -use anyhow::{bail, Context as AnyhowContext}; -use bytes::Bytes; -use futures::FutureExt; -use parking_lot::Mutex; -use postgres_backend::PostgresBackend; -use postgres_backend::{CopyStreamHandlerEnd, PostgresBackendReader, QueryError}; -use postgres_ffi::get_current_timestamp; -use postgres_ffi::{TimestampTz, MAX_SEND_SIZE}; -use pq_proto::{BeMessage, WalSndKeepAlive, XLogDataBody}; -use safekeeper_api::models::{ - HotStandbyFeedback, ReplicationFeedback, StandbyFeedback, StandbyReply, - INVALID_FULL_TRANSACTION_ID, -}; -use safekeeper_api::Term; -use tokio::io::{AsyncRead, AsyncWrite}; -use utils::failpoint_support; -use utils::pageserver_feedback::PageserverFeedback; -use utils::postgres_client::PostgresClientProtocol; - -use itertools::Itertools; -use std::cmp::{max, min}; -use std::net::SocketAddr; -use std::sync::Arc; -use std::time::Duration; -use tokio::sync::watch::Receiver; -use tokio::time::timeout; -use tracing::*; -use utils::{bin_ser::BeSer, lsn::Lsn}; // See: https://www.postgresql.org/docs/13/protocol-replication.html const HOT_STANDBY_FEEDBACK_TAG_BYTE: u8 = b'h'; @@ -906,9 +906,9 @@ impl WalSender<'_, IO> { // pageserver to identify WalReceiverError::SuccessfulCompletion, // do not change this string without updating pageserver. return Err(CopyStreamHandlerEnd::ServerInitiated(format!( - "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", - self.appname, self.start_pos, - ))); + "ending streaming to {:?} at {}, receiver is caughtup and there is no computes", + self.appname, self.start_pos, + ))); } } } diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs index 4d566b12a0..e437e6d2cd 100644 --- a/safekeeper/src/state.rs +++ b/safekeeper/src/state.rs @@ -1,28 +1,24 @@ //! Defines per timeline data stored persistently (SafeKeeperPersistentState) //! and its wrapper with in memory layer (SafekeeperState). -use std::{cmp::max, ops::Deref, time::SystemTime}; +use std::cmp::max; +use std::ops::Deref; +use std::time::SystemTime; -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; use postgres_ffi::WAL_SEGMENT_SIZE; -use safekeeper_api::{ - membership::Configuration, - models::{TimelineMembershipSwitchResponse, TimelineTermBumpResponse}, - ServerInfo, Term, INITIAL_TERM, -}; +use safekeeper_api::membership::Configuration; +use safekeeper_api::models::{TimelineMembershipSwitchResponse, TimelineTermBumpResponse}; +use safekeeper_api::{INITIAL_TERM, ServerInfo, Term}; use serde::{Deserialize, Serialize}; use tracing::info; -use utils::{ - id::{TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; -use crate::{ - control_file, - safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn, UNKNOWN_SERVER_VERSION}, - timeline::TimelineError, - wal_backup_partial::{self}, -}; +use crate::control_file; +use crate::safekeeper::{AcceptorState, PgUuid, TermHistory, TermLsn, UNKNOWN_SERVER_VERSION}; +use crate::timeline::TimelineError; +use crate::wal_backup_partial::{self}; /// Persistent information stored on safekeeper node about timeline. /// On disk data is prefixed by magic and format version and followed by checksum. diff --git a/safekeeper/src/test_utils.rs b/safekeeper/src/test_utils.rs index 32af4537d3..e6f74185c1 100644 --- a/safekeeper/src/test_utils.rs +++ b/safekeeper/src/test_utils.rs @@ -1,5 +1,12 @@ use std::sync::Arc; +use camino_tempfile::Utf8TempDir; +use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator}; +use safekeeper_api::membership::SafekeeperGeneration as Generation; +use tokio::fs::create_dir_all; +use utils::id::{NodeId, TenantTimelineId}; +use utils::lsn::Lsn; + use crate::rate_limit::RateLimiter; use crate::receive_wal::WalAcceptor; use crate::safekeeper::{ @@ -8,16 +15,10 @@ use crate::safekeeper::{ }; use crate::send_wal::EndWatch; use crate::state::{TimelinePersistentState, TimelineState}; -use crate::timeline::{get_timeline_dir, SharedState, StateSK, Timeline}; +use crate::timeline::{SharedState, StateSK, Timeline, get_timeline_dir}; use crate::timelines_set::TimelinesSet; use crate::wal_backup::remote_timeline_path; -use crate::{control_file, receive_wal, wal_storage, SafeKeeperConf}; -use camino_tempfile::Utf8TempDir; -use postgres_ffi::v17::wal_generator::{LogicalMessageGenerator, WalGenerator}; -use safekeeper_api::membership::SafekeeperGeneration as Generation; -use tokio::fs::create_dir_all; -use utils::id::{NodeId, TenantTimelineId}; -use utils::lsn::Lsn; +use crate::{SafeKeeperConf, control_file, receive_wal, wal_storage}; /// A Safekeeper testing or benchmarking environment. Uses a tempdir for storage, removed on drop. pub struct Env { diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 4341f13824..c140f16ced 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -1,37 +1,32 @@ //! This module implements Timeline lifecycle management and has all necessary code //! to glue together SafeKeeper and all other background services. -use anyhow::{anyhow, bail, Result}; +use std::cmp::max; +use std::ops::{Deref, DerefMut}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::time::Duration; + +use anyhow::{Result, anyhow, bail}; use camino::{Utf8Path, Utf8PathBuf}; +use http_utils::error::ApiError; use remote_storage::RemotePath; +use safekeeper_api::Term; use safekeeper_api::membership::Configuration; use safekeeper_api::models::{ PeerInfo, TimelineMembershipSwitchResponse, TimelineTermBumpResponse, }; -use safekeeper_api::Term; +use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId}; use tokio::fs::{self}; +use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard, watch}; +use tokio::time::Instant; use tokio_util::sync::CancellationToken; -use utils::id::TenantId; +use tracing::*; +use utils::id::{NodeId, TenantId, TenantTimelineId}; +use utils::lsn::Lsn; use utils::sync::gate::Gate; -use http_utils::error::ApiError; -use std::cmp::max; -use std::ops::{Deref, DerefMut}; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::sync::Arc; -use std::time::Duration; -use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; -use tokio::{sync::watch, time::Instant}; -use tracing::*; -use utils::{ - id::{NodeId, TenantTimelineId}, - lsn::Lsn, -}; - -use storage_broker::proto::SafekeeperTimelineInfo; -use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; - -use crate::control_file; +use crate::metrics::{FullTimelineInfo, MISC_OPERATION_SECONDS, WalStorageMetrics}; use crate::rate_limit::RateLimiter; use crate::receive_wal::WalReceivers; use crate::safekeeper::{AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, TermLsn}; @@ -42,11 +37,8 @@ use crate::timeline_manager::{AtomicStatus, ManagerCtl}; use crate::timelines_set::TimelinesSet; use crate::wal_backup::{self, remote_timeline_path}; use crate::wal_backup_partial::PartialRemoteSegment; - -use crate::metrics::{FullTimelineInfo, WalStorageMetrics, MISC_OPERATION_SECONDS}; use crate::wal_storage::{Storage as wal_storage_iface, WalReader}; -use crate::SafeKeeperConf; -use crate::{debug_dump, timeline_manager, wal_storage}; +use crate::{SafeKeeperConf, control_file, debug_dump, timeline_manager, wal_storage}; fn peer_info_from_sk_info(sk_info: &SafekeeperTimelineInfo, ts: Instant) -> PeerInfo { PeerInfo { @@ -168,7 +160,7 @@ impl StateSK { pub fn state(&self) -> &TimelineState { match self { StateSK::Loaded(sk) => &sk.state, - StateSK::Offloaded(ref s) => s, + StateSK::Offloaded(s) => s, StateSK::Empty => unreachable!(), } } @@ -176,7 +168,7 @@ impl StateSK { pub fn state_mut(&mut self) -> &mut TimelineState { match self { StateSK::Loaded(sk) => &mut sk.state, - StateSK::Offloaded(ref mut s) => s, + StateSK::Offloaded(s) => s, StateSK::Empty => unreachable!(), } } diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index 303421c837..06ccb32d03 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -7,23 +7,19 @@ use anyhow::Context; use camino::Utf8PathBuf; use remote_storage::RemotePath; -use tokio::{ - fs::File, - io::{AsyncRead, AsyncWriteExt}, -}; +use tokio::fs::File; +use tokio::io::{AsyncRead, AsyncWriteExt}; use tracing::{debug, info, instrument, warn}; use utils::crashsafe::durable_rename; -use crate::{ - metrics::{ - EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, NUM_EVICTED_TIMELINES, - }, - rate_limit::rand_duration, - timeline_manager::{Manager, StateSnapshot}, - wal_backup, - wal_backup_partial::{self, PartialRemoteSegment}, - wal_storage::wal_file_paths, +use crate::metrics::{ + EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED, EvictionEvent, NUM_EVICTED_TIMELINES, }; +use crate::rate_limit::rand_duration; +use crate::timeline_manager::{Manager, StateSnapshot}; +use crate::wal_backup; +use crate::wal_backup_partial::{self, PartialRemoteSegment}; +use crate::wal_storage::wal_file_paths; impl Manager { /// Returns true if the timeline is ready for eviction. diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index a33994dcab..71e99a4de7 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -7,41 +7,36 @@ //! Be aware that you need to be extra careful with manager code, because it is not respawned on panic. //! Also, if it will stuck in some branch, it will prevent any further progress in the timeline. -use std::{ - sync::{atomic::AtomicUsize, Arc}, - time::Duration, -}; +use std::sync::Arc; +use std::sync::atomic::AtomicUsize; +use std::time::Duration; use futures::channel::oneshot; use postgres_ffi::XLogSegNo; -use safekeeper_api::{models::PeerInfo, Term}; +use safekeeper_api::Term; +use safekeeper_api::models::PeerInfo; use serde::{Deserialize, Serialize}; -use tokio::{ - task::{JoinError, JoinHandle}, - time::Instant, -}; +use tokio::task::{JoinError, JoinHandle}; +use tokio::time::Instant; use tokio_util::sync::CancellationToken; -use tracing::{debug, info, info_span, instrument, warn, Instrument}; +use tracing::{Instrument, debug, info, info_span, instrument, warn}; use utils::lsn::Lsn; -use crate::{ - control_file::{FileStorage, Storage}, - metrics::{ - MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS, - NUM_EVICTED_TIMELINES, - }, - rate_limit::{rand_duration, RateLimiter}, - recovery::recovery_main, - remove_wal::calc_horizon_lsn, - send_wal::WalSenders, - state::TimelineState, - timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline}, - timeline_guard::{AccessService, GuardId, ResidenceGuard}, - timelines_set::{TimelineSetGuard, TimelinesSet}, - wal_backup::{self, WalBackupTaskHandle}, - wal_backup_partial::{self, PartialBackup, PartialRemoteSegment}, - SafeKeeperConf, +use crate::SafeKeeperConf; +use crate::control_file::{FileStorage, Storage}; +use crate::metrics::{ + MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS, NUM_EVICTED_TIMELINES, }; +use crate::rate_limit::{RateLimiter, rand_duration}; +use crate::recovery::recovery_main; +use crate::remove_wal::calc_horizon_lsn; +use crate::send_wal::WalSenders; +use crate::state::TimelineState; +use crate::timeline::{ManagerTimeline, ReadGuardSharedState, StateSK, WalResidentTimeline}; +use crate::timeline_guard::{AccessService, GuardId, ResidenceGuard}; +use crate::timelines_set::{TimelineSetGuard, TimelinesSet}; +use crate::wal_backup::{self, WalBackupTaskHandle}; +use crate::wal_backup_partial::{self, PartialBackup, PartialRemoteSegment}; pub(crate) struct StateSnapshot { // inmem values diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 1ff6a72bce..1d29030711 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -2,31 +2,33 @@ //! All timelines should always be present in this map, this is done by loading them //! all from the disk on startup and keeping them in memory. -use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; -use crate::rate_limit::RateLimiter; -use crate::state::TimelinePersistentState; -use crate::timeline::{get_tenant_dir, get_timeline_dir, Timeline, TimelineError}; -use crate::timelines_set::TimelinesSet; -use crate::wal_storage::Storage; -use crate::{control_file, wal_storage, SafeKeeperConf}; -use anyhow::{bail, Context, Result}; -use camino::Utf8PathBuf; -use camino_tempfile::Utf8TempDir; -use safekeeper_api::membership::Configuration; -use safekeeper_api::models::SafekeeperUtilization; -use safekeeper_api::ServerInfo; -use serde::Serialize; use std::collections::HashMap; use std::str::FromStr; use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; + +use anyhow::{Context, Result, bail}; +use camino::Utf8PathBuf; +use camino_tempfile::Utf8TempDir; +use safekeeper_api::ServerInfo; +use safekeeper_api::membership::Configuration; +use safekeeper_api::models::SafekeeperUtilization; +use serde::Serialize; use tokio::fs; use tracing::*; use utils::crashsafe::{durable_rename, fsync_async_opt}; use utils::id::{TenantId, TenantTimelineId, TimelineId}; use utils::lsn::Lsn; +use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; +use crate::rate_limit::RateLimiter; +use crate::state::TimelinePersistentState; +use crate::timeline::{Timeline, TimelineError, get_tenant_dir, get_timeline_dir}; +use crate::timelines_set::TimelinesSet; +use crate::wal_storage::Storage; +use crate::{SafeKeeperConf, control_file, wal_storage}; + // Timeline entry in the global map: either a ready timeline, or mark that it is // being created. #[derive(Clone)] diff --git a/safekeeper/src/timelines_set.rs b/safekeeper/src/timelines_set.rs index 096e348295..1d1abc530f 100644 --- a/safekeeper/src/timelines_set.rs +++ b/safekeeper/src/timelines_set.rs @@ -1,4 +1,5 @@ -use std::{collections::HashMap, sync::Arc}; +use std::collections::HashMap; +use std::sync::Arc; use utils::id::TenantTimelineId; diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 2f6b91cf47..6176e64698 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -1,34 +1,29 @@ -use anyhow::{Context, Result}; - -use camino::{Utf8Path, Utf8PathBuf}; -use futures::stream::FuturesOrdered; -use futures::StreamExt; -use safekeeper_api::models::PeerInfo; -use tokio::task::JoinHandle; -use tokio_util::sync::CancellationToken; -use utils::backoff; -use utils::id::NodeId; - use std::cmp::min; use std::collections::HashSet; use std::num::NonZeroU32; use std::pin::Pin; use std::time::Duration; +use anyhow::{Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use futures::StreamExt; +use futures::stream::FuturesOrdered; use postgres_ffi::v14::xlog_utils::XLogSegNoOffsetToRecPtr; -use postgres_ffi::XLogFileName; -use postgres_ffi::{XLogSegNo, PG_TLI}; +use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo}; use remote_storage::{ DownloadOpts, GenericRemoteStorage, ListingMode, RemotePath, StorageMetadata, }; +use safekeeper_api::models::PeerInfo; use tokio::fs::File; - use tokio::select; use tokio::sync::mpsc::{self, Receiver, Sender}; -use tokio::sync::{watch, OnceCell}; +use tokio::sync::{OnceCell, watch}; +use tokio::task::JoinHandle; +use tokio_util::sync::CancellationToken; use tracing::*; - -use utils::{id::TenantTimelineId, lsn::Lsn}; +use utils::backoff; +use utils::id::{NodeId, TenantTimelineId}; +use utils::lsn::Lsn; use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS}; use crate::timeline::WalResidentTimeline; diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index 5ecb23e8e0..049852a048 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -20,23 +20,23 @@ //! This way control file stores information about all potentially existing //! remote partial segments and can clean them up after uploading a newer version. use camino::Utf8PathBuf; -use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; +use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo}; use remote_storage::RemotePath; use safekeeper_api::Term; use serde::{Deserialize, Serialize}; - use tokio_util::sync::CancellationToken; use tracing::{debug, error, info, instrument, warn}; -use utils::{id::NodeId, lsn::Lsn}; +use utils::id::NodeId; +use utils::lsn::Lsn; -use crate::{ - metrics::{MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS}, - rate_limit::{rand_duration, RateLimiter}, - timeline::WalResidentTimeline, - timeline_manager::StateSnapshot, - wal_backup::{self}, - SafeKeeperConf, +use crate::SafeKeeperConf; +use crate::metrics::{ + MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS, }; +use crate::rate_limit::{RateLimiter, rand_duration}; +use crate::timeline::WalResidentTimeline; +use crate::timeline_manager::StateSnapshot; +use crate::wal_backup::{self}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum UploadStatus { diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs index a0dd571a34..cc9d4e6e3b 100644 --- a/safekeeper/src/wal_reader_stream.rs +++ b/safekeeper/src/wal_reader_stream.rs @@ -1,14 +1,15 @@ -use std::{ - pin::Pin, - task::{Context, Poll}, -}; +use std::pin::Pin; +use std::task::{Context, Poll}; use bytes::Bytes; -use futures::{stream::BoxStream, Stream, StreamExt}; +use futures::stream::BoxStream; +use futures::{Stream, StreamExt}; +use safekeeper_api::Term; use utils::lsn::Lsn; -use crate::{send_wal::EndWatch, timeline::WalResidentTimeline, wal_storage::WalReader}; -use safekeeper_api::Term; +use crate::send_wal::EndWatch; +use crate::timeline::WalResidentTimeline; +use crate::wal_storage::WalReader; #[derive(PartialEq, Eq, Debug)] pub(crate) struct WalBytes { @@ -224,12 +225,11 @@ mod tests { use futures::StreamExt; use postgres_ffi::MAX_SEND_SIZE; - use utils::{ - id::{NodeId, TenantTimelineId}, - lsn::Lsn, - }; + use utils::id::{NodeId, TenantTimelineId}; + use utils::lsn::Lsn; - use crate::{test_utils::Env, wal_reader_stream::StreamingWalReader}; + use crate::test_utils::Env; + use crate::wal_reader_stream::StreamingWalReader; #[tokio::test] async fn test_streaming_wal_reader_reset() { diff --git a/safekeeper/src/wal_service.rs b/safekeeper/src/wal_service.rs index e5ccbb3230..045fa88cb0 100644 --- a/safekeeper/src/wal_service.rs +++ b/safekeeper/src/wal_service.rs @@ -2,23 +2,23 @@ //! WAL service listens for client connections and //! receive WAL from wal_proposer and send it to WAL receivers //! -use anyhow::{Context, Result}; -use postgres_backend::QueryError; -use safekeeper_api::models::ConnectionId; +use std::os::fd::AsRawFd; use std::sync::Arc; use std::time::Duration; + +use anyhow::{Context, Result}; +use postgres_backend::{AuthType, PostgresBackend, QueryError}; +use safekeeper_api::models::ConnectionId; use tokio::net::TcpStream; use tokio_io_timeout::TimeoutReader; use tokio_util::sync::CancellationToken; use tracing::*; -use utils::{auth::Scope, measured_stream::MeasuredStream}; - -use std::os::fd::AsRawFd; +use utils::auth::Scope; +use utils::measured_stream::MeasuredStream; +use crate::handler::SafekeeperPostgresHandler; use crate::metrics::TrafficMetrics; -use crate::SafeKeeperConf; -use crate::{handler::SafekeeperPostgresHandler, GlobalTimelines}; -use postgres_backend::{AuthType, PostgresBackend}; +use crate::{GlobalTimelines, SafeKeeperConf}; /// Accept incoming TCP connections and spawn them into a background thread. /// diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index e338d70731..ed197a3f83 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -7,32 +7,32 @@ //! //! Note that last file has `.partial` suffix, that's different from postgres. -use anyhow::{bail, Context, Result}; -use bytes::Bytes; -use camino::{Utf8Path, Utf8PathBuf}; -use futures::future::BoxFuture; -use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName}; -use postgres_ffi::{dispatch_pgversion, XLogSegNo, PG_TLI}; -use remote_storage::RemotePath; use std::cmp::{max, min}; use std::future::Future; use std::io::{self, SeekFrom}; use std::pin::Pin; -use tokio::fs::{self, remove_file, File, OpenOptions}; -use tokio::io::{AsyncRead, AsyncWriteExt}; -use tokio::io::{AsyncReadExt, AsyncSeekExt}; + +use anyhow::{Context, Result, bail}; +use bytes::Bytes; +use camino::{Utf8Path, Utf8PathBuf}; +use futures::future::BoxFuture; +use postgres_ffi::v14::xlog_utils::{IsPartialXLogFileName, IsXLogFileName, XLogFromFileName}; +use postgres_ffi::waldecoder::WalStreamDecoder; +use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo, dispatch_pgversion}; +use pq_proto::SystemId; +use remote_storage::RemotePath; +use tokio::fs::{self, File, OpenOptions, remove_file}; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; use tracing::*; use utils::crashsafe::durable_rename; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; use crate::metrics::{ - time_io_closure, WalStorageMetrics, REMOVED_WAL_SEGMENTS, WAL_STORAGE_OPERATION_SECONDS, + REMOVED_WAL_SEGMENTS, WAL_STORAGE_OPERATION_SECONDS, WalStorageMetrics, time_io_closure, }; use crate::state::TimelinePersistentState; use crate::wal_backup::{read_object, remote_timeline_path}; -use postgres_ffi::waldecoder::WalStreamDecoder; -use postgres_ffi::XLogFileName; -use pq_proto::SystemId; -use utils::{id::TenantTimelineId, lsn::Lsn}; pub trait Storage { // Last written LSN. @@ -200,7 +200,12 @@ impl PhysicalStorage { ttid.timeline_id, flush_lsn, state.commit_lsn, state.peer_horizon_lsn, ); if flush_lsn < state.commit_lsn { - bail!("timeline {} potential data loss: flush_lsn {} by find_end_of_wal is less than commit_lsn {} from control file", ttid.timeline_id, flush_lsn, state.commit_lsn); + bail!( + "timeline {} potential data loss: flush_lsn {} by find_end_of_wal is less than commit_lsn {} from control file", + ttid.timeline_id, + flush_lsn, + state.commit_lsn + ); } if flush_lsn < state.peer_horizon_lsn { warn!( diff --git a/safekeeper/tests/misc_test.rs b/safekeeper/tests/misc_test.rs index 8e5b17a143..8e54d2bb86 100644 --- a/safekeeper/tests/misc_test.rs +++ b/safekeeper/tests/misc_test.rs @@ -3,9 +3,9 @@ use std::sync::Arc; use tracing::{info, warn}; use utils::lsn::Lsn; -use crate::walproposer_sim::{ - log::{init_logger, init_tracing_logger}, - simulation::{generate_network_opts, generate_schedule, Schedule, TestAction, TestConfig}, +use crate::walproposer_sim::log::{init_logger, init_tracing_logger}; +use crate::walproposer_sim::simulation::{ + Schedule, TestAction, TestConfig, generate_network_opts, generate_schedule, }; pub mod walproposer_sim; diff --git a/safekeeper/tests/random_test.rs b/safekeeper/tests/random_test.rs index 1a932ef699..e29b58836a 100644 --- a/safekeeper/tests/random_test.rs +++ b/safekeeper/tests/random_test.rs @@ -1,11 +1,9 @@ use rand::Rng; use tracing::{info, warn}; -use crate::walproposer_sim::{ - log::{init_logger, init_tracing_logger}, - simulation::{generate_network_opts, generate_schedule, TestConfig}, - simulation_logs::validate_events, -}; +use crate::walproposer_sim::log::{init_logger, init_tracing_logger}; +use crate::walproposer_sim::simulation::{TestConfig, generate_network_opts, generate_schedule}; +use crate::walproposer_sim::simulation_logs::validate_events; pub mod walproposer_sim; @@ -18,7 +16,7 @@ fn test_random_schedules() -> anyhow::Result<()> { let mut config = TestConfig::new(Some(clock)); for _ in 0..500 { - let seed: u64 = rand::thread_rng().gen(); + let seed: u64 = rand::thread_rng().r#gen(); config.network = generate_network_opts(seed); let test = config.start(seed); diff --git a/safekeeper/tests/simple_test.rs b/safekeeper/tests/simple_test.rs index 0be9d0deef..f7b266e39c 100644 --- a/safekeeper/tests/simple_test.rs +++ b/safekeeper/tests/simple_test.rs @@ -1,7 +1,8 @@ use tracing::info; use utils::lsn::Lsn; -use crate::walproposer_sim::{log::init_logger, simulation::TestConfig}; +use crate::walproposer_sim::log::init_logger; +use crate::walproposer_sim::simulation::TestConfig; pub mod walproposer_sim; diff --git a/safekeeper/tests/walproposer_sim/log.rs b/safekeeper/tests/walproposer_sim/log.rs index 870f30de4f..e2ba3282ca 100644 --- a/safekeeper/tests/walproposer_sim/log.rs +++ b/safekeeper/tests/walproposer_sim/log.rs @@ -1,9 +1,11 @@ -use std::{fmt, sync::Arc}; +use std::fmt; +use std::sync::Arc; use desim::time::Timing; use once_cell::sync::OnceCell; use parking_lot::Mutex; -use tracing_subscriber::fmt::{format::Writer, time::FormatTime}; +use tracing_subscriber::fmt::format::Writer; +use tracing_subscriber::fmt::time::FormatTime; /// SimClock can be plugged into tracing logger to print simulation time. #[derive(Clone)] diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index b9dfabe0d7..6ce1a9940e 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -2,31 +2,30 @@ //! Gets messages from the network, passes them down to consensus module and //! sends replies back. -use std::{collections::HashMap, sync::Arc, time::Duration}; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Duration; -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; use bytes::{Bytes, BytesMut}; use camino::Utf8PathBuf; -use desim::{ - executor::{self, PollSome}, - network::TCP, - node_os::NodeOs, - proto::{AnyMessage, NetEvent, NodeEvent}, -}; +use desim::executor::{self, PollSome}; +use desim::network::TCP; +use desim::node_os::NodeOs; +use desim::proto::{AnyMessage, NetEvent, NodeEvent}; use http::Uri; -use safekeeper::{ - safekeeper::{ProposerAcceptorMessage, SafeKeeper, SK_PROTO_VERSION_3, UNKNOWN_SERVER_VERSION}, - state::{TimelinePersistentState, TimelineState}, - timeline::TimelineError, - wal_storage::Storage, - SafeKeeperConf, +use safekeeper::SafeKeeperConf; +use safekeeper::safekeeper::{ + ProposerAcceptorMessage, SK_PROTO_VERSION_3, SafeKeeper, UNKNOWN_SERVER_VERSION, }; -use safekeeper_api::{membership::Configuration, ServerInfo}; +use safekeeper::state::{TimelinePersistentState, TimelineState}; +use safekeeper::timeline::TimelineError; +use safekeeper::wal_storage::Storage; +use safekeeper_api::ServerInfo; +use safekeeper_api::membership::Configuration; use tracing::{debug, info_span, warn}; -use utils::{ - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; use super::safekeeper_disk::{DiskStateStorage, DiskWALStorage, SafekeeperDisk, TimelineDisk}; diff --git a/safekeeper/tests/walproposer_sim/safekeeper_disk.rs b/safekeeper/tests/walproposer_sim/safekeeper_disk.rs index b854754ecf..94a849b5f0 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper_disk.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper_disk.rs @@ -1,22 +1,23 @@ use std::collections::HashMap; +use std::ops::Deref; use std::sync::Arc; - -use parking_lot::Mutex; -use safekeeper::state::TimelinePersistentState; -use utils::id::TenantTimelineId; - -use super::block_storage::BlockStorage; - -use std::{ops::Deref, time::Instant}; +use std::time::Instant; use anyhow::Result; use bytes::{Buf, BytesMut}; use futures::future::BoxFuture; -use postgres_ffi::{waldecoder::WalStreamDecoder, XLogSegNo}; -use safekeeper::{control_file, metrics::WalStorageMetrics, wal_storage}; +use parking_lot::Mutex; +use postgres_ffi::XLogSegNo; +use postgres_ffi::waldecoder::WalStreamDecoder; +use safekeeper::metrics::WalStorageMetrics; +use safekeeper::state::TimelinePersistentState; +use safekeeper::{control_file, wal_storage}; use tracing::{debug, info}; +use utils::id::TenantTimelineId; use utils::lsn::Lsn; +use super::block_storage::BlockStorage; + /// All safekeeper state that is usually saved to disk. pub struct SafekeeperDisk { pub timelines: Mutex>>, diff --git a/safekeeper/tests/walproposer_sim/simulation.rs b/safekeeper/tests/walproposer_sim/simulation.rs index fabf450eef..f314143952 100644 --- a/safekeeper/tests/walproposer_sim/simulation.rs +++ b/safekeeper/tests/walproposer_sim/simulation.rs @@ -1,23 +1,24 @@ -use std::{cell::Cell, str::FromStr, sync::Arc}; +use std::cell::Cell; +use std::str::FromStr; +use std::sync::Arc; -use crate::walproposer_sim::{safekeeper::run_server, walproposer_api::SimulationApi}; -use desim::{ - executor::{self, ExternalHandle}, - node_os::NodeOs, - options::{Delay, NetworkOptions}, - proto::{AnyMessage, NodeEvent}, - world::Node, - world::World, -}; +use desim::executor::{self, ExternalHandle}; +use desim::node_os::NodeOs; +use desim::options::{Delay, NetworkOptions}; +use desim::proto::{AnyMessage, NodeEvent}; +use desim::world::{Node, World}; use rand::{Rng, SeedableRng}; use tracing::{debug, info_span, warn}; -use utils::{id::TenantTimelineId, lsn::Lsn}; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; use walproposer::walproposer::{Config, Wrapper}; -use super::{ - log::SimClock, safekeeper_disk::SafekeeperDisk, walproposer_api, - walproposer_disk::DiskWalProposer, -}; +use super::log::SimClock; +use super::safekeeper_disk::SafekeeperDisk; +use super::walproposer_api; +use super::walproposer_disk::DiskWalProposer; +use crate::walproposer_sim::safekeeper::run_server; +use crate::walproposer_sim::walproposer_api::SimulationApi; /// Simulated safekeeper node. pub struct SafekeeperNode { diff --git a/safekeeper/tests/walproposer_sim/walproposer_api.rs b/safekeeper/tests/walproposer_sim/walproposer_api.rs index 5578c94cf6..6451589e80 100644 --- a/safekeeper/tests/walproposer_sim/walproposer_api.rs +++ b/safekeeper/tests/walproposer_sim/walproposer_api.rs @@ -1,26 +1,20 @@ -use std::{ - cell::{RefCell, RefMut, UnsafeCell}, - ffi::CStr, - sync::Arc, -}; +use std::cell::{RefCell, RefMut, UnsafeCell}; +use std::ffi::CStr; +use std::sync::Arc; use bytes::Bytes; -use desim::{ - executor::{self, PollSome}, - network::TCP, - node_os::NodeOs, - proto::{AnyMessage, NetEvent, NodeEvent}, - world::NodeId, -}; +use desim::executor::{self, PollSome}; +use desim::network::TCP; +use desim::node_os::NodeOs; +use desim::proto::{AnyMessage, NetEvent, NodeEvent}; +use desim::world::NodeId; use tracing::debug; use utils::lsn::Lsn; -use walproposer::{ - api_bindings::Level, - bindings::{ - NeonWALReadResult, SafekeeperStateDesiredEvents, WL_SOCKET_READABLE, WL_SOCKET_WRITEABLE, - }, - walproposer::{ApiImpl, Config}, +use walproposer::api_bindings::Level; +use walproposer::bindings::{ + NeonWALReadResult, SafekeeperStateDesiredEvents, WL_SOCKET_READABLE, WL_SOCKET_WRITEABLE, }; +use walproposer::walproposer::{ApiImpl, Config}; use super::walproposer_disk::DiskWalProposer; @@ -578,7 +572,9 @@ impl ApiImpl for SimulationApi { let disk_lsn = disk.lock().flush_rec_ptr().0; debug!("start_streaming at {} (disk_lsn={})", startpos, disk_lsn); if startpos < disk_lsn { - debug!("startpos < disk_lsn, it means we wrote some transaction even before streaming started"); + debug!( + "startpos < disk_lsn, it means we wrote some transaction even before streaming started" + ); } assert!(startpos <= disk_lsn); let mut broadcasted = Lsn(startpos); diff --git a/safekeeper/tests/walproposer_sim/walproposer_disk.rs b/safekeeper/tests/walproposer_sim/walproposer_disk.rs index 7dc7f48548..fe3eee8a5a 100644 --- a/safekeeper/tests/walproposer_sim/walproposer_disk.rs +++ b/safekeeper/tests/walproposer_sim/walproposer_disk.rs @@ -1,4 +1,5 @@ -use std::{ffi::CStr, sync::Arc}; +use std::ffi::CStr; +use std::sync::Arc; use parking_lot::{Mutex, MutexGuard}; use postgres_ffi::v16::wal_generator::{LogicalMessageGenerator, WalGenerator}; diff --git a/storage_broker/Cargo.toml b/storage_broker/Cargo.toml index 17d4aed63b..e4db9a317d 100644 --- a/storage_broker/Cargo.toml +++ b/storage_broker/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "storage_broker" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [features] diff --git a/storage_broker/benches/rps.rs b/storage_broker/benches/rps.rs index 1a6fb7fedf..86f2dd9a6c 100644 --- a/storage_broker/benches/rps.rs +++ b/storage_broker/benches/rps.rs @@ -1,18 +1,14 @@ -use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{Duration, Instant}; use clap::Parser; - -use storage_broker::proto::SafekeeperTimelineInfo; use storage_broker::proto::{ - FilterTenantTimelineId, MessageType, SubscribeByFilterRequest, + FilterTenantTimelineId, MessageType, SafekeeperTimelineInfo, SubscribeByFilterRequest, TenantTimelineId as ProtoTenantTimelineId, TypeSubscription, TypedMessage, }; - use storage_broker::{BrokerClientChannel, DEFAULT_ENDPOINT}; use tokio::time; - use tonic::Request; const ABOUT: &str = r#" diff --git a/storage_broker/src/bin/storage_broker.rs b/storage_broker/src/bin/storage_broker.rs index 9d4c22484c..cc33ec20ff 100644 --- a/storage_broker/src/bin/storage_broker.rs +++ b/storage_broker/src/bin/storage_broker.rs @@ -10,7 +10,14 @@ //! //! Only safekeeper message is supported, but it is not hard to add something //! else with generics. -use clap::{command, Parser}; +use std::collections::HashMap; +use std::convert::Infallible; +use std::net::SocketAddr; +use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; + +use clap::{Parser, command}; use futures_core::Stream; use futures_util::StreamExt; use http_body_util::Full; @@ -19,27 +26,10 @@ use hyper::header::CONTENT_TYPE; use hyper::service::service_fn; use hyper::{Method, StatusCode}; use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer}; -use parking_lot::RwLock; -use std::collections::HashMap; -use std::convert::Infallible; -use std::net::SocketAddr; -use std::pin::Pin; -use std::sync::Arc; -use std::time::Duration; -use tokio::net::TcpListener; -use tokio::sync::broadcast; -use tokio::sync::broadcast::error::RecvError; -use tokio::time; -use tonic::body::{self, empty_body, BoxBody}; -use tonic::codegen::Service; -use tonic::Code; -use tonic::{Request, Response, Status}; -use tracing::*; -use utils::signals::ShutdownSignals; - use metrics::{Encoder, TextEncoder}; +use parking_lot::RwLock; use storage_broker::metrics::{ - BROADCASTED_MESSAGES_TOTAL, BROADCAST_DROPPED_MESSAGES_TOTAL, NUM_PUBS, NUM_SUBS_ALL, + BROADCAST_DROPPED_MESSAGES_TOTAL, BROADCASTED_MESSAGES_TOTAL, NUM_PUBS, NUM_SUBS_ALL, NUM_SUBS_TIMELINE, PROCESSED_MESSAGES_TOTAL, PUBLISHED_ONEOFF_MESSAGES_TOTAL, }; use storage_broker::proto::broker_service_server::{BrokerService, BrokerServiceServer}; @@ -48,10 +38,19 @@ use storage_broker::proto::{ FilterTenantTimelineId, MessageType, SafekeeperDiscoveryRequest, SafekeeperDiscoveryResponse, SafekeeperTimelineInfo, SubscribeByFilterRequest, SubscribeSafekeeperInfoRequest, TypedMessage, }; -use storage_broker::{parse_proto_ttid, DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_LISTEN_ADDR}; +use storage_broker::{DEFAULT_KEEPALIVE_INTERVAL, DEFAULT_LISTEN_ADDR, parse_proto_ttid}; +use tokio::net::TcpListener; +use tokio::sync::broadcast; +use tokio::sync::broadcast::error::RecvError; +use tokio::time; +use tonic::body::{self, BoxBody, empty_body}; +use tonic::codegen::Service; +use tonic::{Code, Request, Response, Status}; +use tracing::*; use utils::id::TenantTimelineId; use utils::logging::{self, LogFormat}; use utils::sentry_init::init_sentry; +use utils::signals::ShutdownSignals; use utils::{project_build_tag, project_git_version}; project_git_version!(GIT_VERSION); @@ -743,11 +742,12 @@ async fn main() -> Result<(), Box> { #[cfg(test)] mod tests { - use super::*; use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; use tokio::sync::broadcast::error::TryRecvError; use utils::id::{TenantId, TimelineId}; + use super::*; + fn msg(timeline_id: Vec) -> Message { Message::SafekeeperTimelineInfo(SafekeeperTimelineInfo { safekeeper_id: 1, diff --git a/storage_broker/src/lib.rs b/storage_broker/src/lib.rs index 3ac40f6e14..55d411f607 100644 --- a/storage_broker/src/lib.rs +++ b/storage_broker/src/lib.rs @@ -1,12 +1,11 @@ use std::time::Duration; -use tonic::codegen::StdError; -use tonic::transport::{ClientTlsConfig, Endpoint}; -use tonic::{transport::Channel, Status}; -use utils::id::{TenantId, TenantTimelineId, TimelineId}; -use proto::{ - broker_service_client::BrokerServiceClient, TenantTimelineId as ProtoTenantTimelineId, -}; +use proto::TenantTimelineId as ProtoTenantTimelineId; +use proto::broker_service_client::BrokerServiceClient; +use tonic::Status; +use tonic::codegen::StdError; +use tonic::transport::{Channel, ClientTlsConfig, Endpoint}; +use utils::id::{TenantId, TenantTimelineId, TimelineId}; // Code generated by protobuf. pub mod proto { @@ -20,11 +19,8 @@ pub mod proto { pub mod metrics; // Re-exports to avoid direct tonic dependency in user crates. -pub use tonic::Code; -pub use tonic::Request; -pub use tonic::Streaming; - pub use hyper::Uri; +pub use tonic::{Code, Request, Streaming}; pub const DEFAULT_LISTEN_ADDR: &str = "127.0.0.1:50051"; pub const DEFAULT_ENDPOINT: &str = const_format::formatcp!("http://{DEFAULT_LISTEN_ADDR}"); diff --git a/storage_broker/src/metrics.rs b/storage_broker/src/metrics.rs index 1fd3dd5ad6..ecfb594eba 100644 --- a/storage_broker/src/metrics.rs +++ b/storage_broker/src/metrics.rs @@ -1,6 +1,6 @@ //! Broker metrics. -use metrics::{register_int_counter, register_int_gauge, IntCounter, IntGauge}; +use metrics::{IntCounter, IntGauge, register_int_counter, register_int_gauge}; use once_cell::sync::Lazy; pub static NUM_PUBS: Lazy = Lazy::new(|| { diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index 8e82996db1..b63ba154da 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "storage_controller" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [[bin]] diff --git a/storage_controller/src/background_node_operations.rs b/storage_controller/src/background_node_operations.rs index 226d4942e7..a630316f46 100644 --- a/storage_controller/src/background_node_operations.rs +++ b/storage_controller/src/background_node_operations.rs @@ -1,4 +1,5 @@ -use std::{borrow::Cow, fmt::Debug, fmt::Display}; +use std::borrow::Cow; +use std::fmt::{Debug, Display}; use tokio_util::sync::CancellationToken; use utils::id::NodeId; diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index 5bc3c81f02..b602af362d 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -1,7 +1,8 @@ use std::borrow::Cow; +use std::collections::HashMap; use std::error::Error as _; use std::sync::Arc; -use std::{collections::HashMap, time::Duration}; +use std::time::Duration; use control_plane::endpoint::{ComputeControlPlane, EndpointStatus}; use control_plane::local_env::LocalEnv; @@ -12,11 +13,9 @@ use pageserver_api::shard::{ShardCount, ShardNumber, ShardStripeSize, TenantShar use postgres_connection::parse_host_port; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; -use tracing::{info_span, Instrument}; -use utils::{ - backoff::{self}, - id::{NodeId, TenantId}, -}; +use tracing::{Instrument, info_span}; +use utils::backoff::{self}; +use utils::id::{NodeId, TenantId}; use crate::service::Config; diff --git a/storage_controller/src/drain_utils.rs b/storage_controller/src/drain_utils.rs index 8b7be88078..bd4b8ba38f 100644 --- a/storage_controller/src/drain_utils.rs +++ b/storage_controller/src/drain_utils.rs @@ -1,15 +1,14 @@ -use std::{ - collections::{BTreeMap, HashMap}, - sync::Arc, -}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; use pageserver_api::controller_api::{NodeSchedulingPolicy, ShardSchedulingPolicy}; -use utils::{id::NodeId, shard::TenantShardId}; +use utils::id::NodeId; +use utils::shard::TenantShardId; -use crate::{ - background_node_operations::OperationError, node::Node, scheduler::Scheduler, - tenant_shard::TenantShard, -}; +use crate::background_node_operations::OperationError; +use crate::node::Node; +use crate::scheduler::Scheduler; +use crate::tenant_shard::TenantShard; pub(crate) struct TenantShardIterator { tenants_accessor: F, @@ -188,10 +187,8 @@ impl TenantShardDrain { mod tests { use std::sync::Arc; - use utils::{ - id::TenantId, - shard::{ShardCount, ShardNumber, TenantShardId}, - }; + use utils::id::TenantId; + use utils::shard::{ShardCount, ShardNumber, TenantShardId}; use super::TenantShardIterator; diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs index 88ee7887d3..56a331becd 100644 --- a/storage_controller/src/heartbeater.rs +++ b/storage_controller/src/heartbeater.rs @@ -1,24 +1,22 @@ -use futures::{stream::FuturesUnordered, StreamExt}; +use std::collections::HashMap; +use std::fmt::Debug; +use std::future::Future; +use std::sync::Arc; +use std::time::{Duration, Instant}; + +use futures::StreamExt; +use futures::stream::FuturesUnordered; +use pageserver_api::controller_api::{NodeAvailability, SkSchedulingPolicy}; +use pageserver_api::models::PageserverUtilization; use safekeeper_api::models::SafekeeperUtilization; use safekeeper_client::mgmt_api; -use std::{ - collections::HashMap, - fmt::Debug, - future::Future, - sync::Arc, - time::{Duration, Instant}, -}; -use tokio_util::sync::CancellationToken; - -use pageserver_api::{ - controller_api::{NodeAvailability, SkSchedulingPolicy}, - models::PageserverUtilization, -}; - use thiserror::Error; -use utils::{id::NodeId, logging::SecretString}; +use tokio_util::sync::CancellationToken; +use utils::id::NodeId; +use utils::logging::SecretString; -use crate::{node::Node, safekeeper::Safekeeper}; +use crate::node::Node; +use crate::safekeeper::Safekeeper; struct HeartbeaterTask { receiver: tokio::sync::mpsc::UnboundedReceiver>, diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 33b3d88c25..5b5ae80eaf 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -1,32 +1,27 @@ -use crate::http; -use crate::metrics::{ - HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, PageserverRequestLabelGroup, - METRICS_REGISTRY, -}; -use crate::persistence::SafekeeperUpsert; -use crate::reconciler::ReconcileError; -use crate::service::{LeadershipStatus, Service, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT}; +use std::str::FromStr; +use std::sync::Arc; +use std::time::{Duration, Instant}; + use anyhow::Context; +use control_plane::storage_controller::{AttachHookRequest, InspectRequest}; use futures::Future; -use http_utils::{ - endpoint::{ - self, auth_middleware, check_permission_with, profile_cpu_handler, profile_heap_handler, - request_span, - }, - error::ApiError, - failpoints::failpoints_handler, - json::{json_request, json_response}, - request::{must_get_query_param, parse_query_param, parse_request_param}, - RequestExt, RouterBuilder, +use http_utils::endpoint::{ + self, auth_middleware, check_permission_with, profile_cpu_handler, profile_heap_handler, + request_span, }; +use http_utils::error::ApiError; +use http_utils::failpoints::failpoints_handler; +use http_utils::json::{json_request, json_response}; +use http_utils::request::{must_get_query_param, parse_query_param, parse_request_param}; +use http_utils::{RequestExt, RouterBuilder}; use hyper::header::CONTENT_TYPE; -use hyper::{Body, Request, Response}; -use hyper::{StatusCode, Uri}; +use hyper::{Body, Request, Response, StatusCode, Uri}; use metrics::{BuildInfo, NeonMetrics}; use pageserver_api::controller_api::{ MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse, MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse, - SafekeeperSchedulingPolicyRequest, ShardsPreferredAzsRequest, TenantCreateRequest, + NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, SafekeeperSchedulingPolicyRequest, + ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest, }; use pageserver_api::models::{ TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, @@ -34,23 +29,21 @@ use pageserver_api::models::{ TimelineCreateRequest, }; use pageserver_api::shard::TenantShardId; -use pageserver_client::{mgmt_api, BlockUnblock}; -use std::str::FromStr; -use std::sync::Arc; -use std::time::{Duration, Instant}; +use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; +use pageserver_client::{BlockUnblock, mgmt_api}; +use routerify::Middleware; use tokio_util::sync::CancellationToken; use utils::auth::{Scope, SwappableJwtAuth}; use utils::id::{NodeId, TenantId, TimelineId}; -use pageserver_api::controller_api::{ - NodeAvailability, NodeConfigureRequest, NodeRegisterRequest, TenantPolicyRequest, - TenantShardMigrateRequest, +use crate::http; +use crate::metrics::{ + HttpRequestLatencyLabelGroup, HttpRequestStatusLabelGroup, METRICS_REGISTRY, + PageserverRequestLabelGroup, }; -use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; - -use control_plane::storage_controller::{AttachHookRequest, InspectRequest}; - -use routerify::Middleware; +use crate::persistence::SafekeeperUpsert; +use crate::reconciler::ReconcileError; +use crate::service::{LeadershipStatus, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIMEOUT, Service}; /// State available to HTTP request handlers pub struct HttpState { @@ -1455,8 +1448,8 @@ pub fn prologue_leadership_status_check_middleware< }) } -fn prologue_metrics_middleware( -) -> Middleware { +fn prologue_metrics_middleware() +-> Middleware { Middleware::pre(move |req| async move { let meta = RequestMeta { method: req.method().clone(), @@ -1469,8 +1462,8 @@ fn prologue_metrics_middleware }) } -fn epilogue_metrics_middleware( -) -> Middleware { +fn epilogue_metrics_middleware() +-> Middleware { Middleware::post_with_info(move |resp, req_info| async move { let request_name = match req_info.context::() { Some(name) => name, @@ -1621,8 +1614,8 @@ async fn maybe_forward(req: Request) -> ForwardOutcome { Err(err) => { return ForwardOutcome::Forwarded(Err(ApiError::InternalServerError( anyhow::anyhow!( - "Failed to parse leader uri for forwarding while in stepped down state: {err}" - ), + "Failed to parse leader uri for forwarding while in stepped down state: {err}" + ), ))); } }; @@ -2155,8 +2148,23 @@ mod test { #[test] fn test_path_without_ids() { - assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/"); - assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/"); - assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"), "/v1/tenant//timeline/"); + assert_eq!( + path_without_ids( + "/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788" + ), + "/v1/tenant//timeline/" + ); + assert_eq!( + path_without_ids( + "/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788" + ), + "/v1/tenant//timeline/" + ); + assert_eq!( + path_without_ids( + "/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo" + ), + "/v1/tenant//timeline/" + ); } } diff --git a/storage_controller/src/id_lock_map.rs b/storage_controller/src/id_lock_map.rs index 2d8b674f86..6b0c16f0be 100644 --- a/storage_controller/src/id_lock_map.rs +++ b/storage_controller/src/id_lock_map.rs @@ -1,8 +1,7 @@ +use std::collections::HashMap; use std::fmt::Display; -use std::time::Instant; -use std::{collections::HashMap, sync::Arc}; - -use std::time::Duration; +use std::sync::Arc; +use std::time::{Duration, Instant}; use crate::service::RECONCILE_TIMEOUT; diff --git a/storage_controller/src/leadership.rs b/storage_controller/src/leadership.rs index 5fae8991ec..5e1d6f3ec9 100644 --- a/storage_controller/src/leadership.rs +++ b/storage_controller/src/leadership.rs @@ -3,11 +3,9 @@ use std::sync::Arc; use hyper::Uri; use tokio_util::sync::CancellationToken; -use crate::{ - peer_client::{GlobalObservedState, PeerClient}, - persistence::{ControllerPersistence, DatabaseError, DatabaseResult, Persistence}, - service::Config, -}; +use crate::peer_client::{GlobalObservedState, PeerClient}; +use crate::persistence::{ControllerPersistence, DatabaseError, DatabaseResult, Persistence}; +use crate::service::Config; /// Helper for storage controller leadership acquisition pub(crate) struct Leadership { @@ -91,7 +89,9 @@ impl Leadership { // Special case: if this is a brand new storage controller, migrations will not // have run at this point yet, and, hence, the controllers table does not exist. // Detect this case via the error string (diesel doesn't type it) and allow it. - tracing::info!("Detected first storage controller start-up. Allowing missing controllers table ..."); + tracing::info!( + "Detected first storage controller start-up. Allowing missing controllers table ..." + ); return Ok(None); } } diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 4152e40a76..04dd3bb3f6 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -1,26 +1,26 @@ -use anyhow::{anyhow, Context}; -use clap::Parser; -use hyper0::Uri; -use metrics::launch_timestamp::LaunchTimestamp; -use metrics::BuildInfo; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; + +use anyhow::{Context, anyhow}; +use clap::Parser; +use hyper0::Uri; +use metrics::BuildInfo; +use metrics::launch_timestamp::LaunchTimestamp; use storage_controller::http::make_router; use storage_controller::metrics::preinitialize_metrics; use storage_controller::persistence::Persistence; use storage_controller::service::chaos_injector::ChaosInjector; use storage_controller::service::{ - Config, Service, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT, + Config, HEARTBEAT_INTERVAL_DEFAULT, LONG_RECONCILE_THRESHOLD_DEFAULT, MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT, - PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, + PRIORITY_RECONCILER_CONCURRENCY_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, Service, }; use tokio::signal::unix::SignalKind; use tokio_util::sync::CancellationToken; use tracing::Instrument; use utils::auth::{JwtAuth, SwappableJwtAuth}; use utils::logging::{self, LogFormat}; - use utils::sentry_init::init_sentry; use utils::{project_build_tag, project_git_version, tcp_listener}; @@ -34,7 +34,7 @@ static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; /// This adds roughly 3% overhead for allocations on average, which is acceptable considering /// performance-sensitive code will avoid allocations as far as possible anyway. #[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] +#[unsafe(export_name = "malloc_conf")] pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:21\0"; #[derive(Parser)] @@ -297,8 +297,8 @@ async fn async_main() -> anyhow::Result<()> { // Production systems should always have secrets configured: if public_key was not set // then we would implicitly disable auth. anyhow::bail!( - "Insecure config! One or more secrets is not set. This is only permitted in `--dev` mode" - ); + "Insecure config! One or more secrets is not set. This is only permitted in `--dev` mode" + ); } StrictMode::Strict if args.compute_hook_url.is_none() => { // Production systems should always have a compute hook set, to prevent falling diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs index 6d67e0d130..f490edb68f 100644 --- a/storage_controller/src/metrics.rs +++ b/storage_controller/src/metrics.rs @@ -7,17 +7,18 @@ //! //! The rest of the code defines label group types and deals with converting outer types to labels. //! +use std::sync::Mutex; + use bytes::Bytes; -use measured::{label::LabelValue, metric::histogram, FixedCardinalityLabel, MetricGroup}; +use measured::label::LabelValue; +use measured::metric::histogram; +use measured::{FixedCardinalityLabel, MetricGroup}; use metrics::NeonMetrics; use once_cell::sync::Lazy; -use std::sync::Mutex; use strum::IntoEnumIterator; -use crate::{ - persistence::{DatabaseError, DatabaseOperation}, - service::LeadershipStatus, -}; +use crate::persistence::{DatabaseError, DatabaseOperation}; +use crate::service::LeadershipStatus; pub(crate) static METRICS_REGISTRY: Lazy = Lazy::new(StorageControllerMetrics::default); diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs index 3762d13c10..bc7fe8802a 100644 --- a/storage_controller/src/node.rs +++ b/storage_controller/src/node.rs @@ -1,22 +1,22 @@ -use std::{str::FromStr, time::Duration}; +use std::str::FromStr; +use std::time::Duration; use anyhow::anyhow; -use pageserver_api::{ - controller_api::{ - AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, - NodeSchedulingPolicy, TenantLocateResponseShard, - }, - shard::TenantShardId, +use pageserver_api::controller_api::{ + AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, + NodeSchedulingPolicy, TenantLocateResponseShard, }; +use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; use reqwest::StatusCode; use serde::Serialize; use tokio_util::sync::CancellationToken; -use utils::{backoff, id::NodeId}; +use utils::backoff; +use utils::id::NodeId; -use crate::{ - pageserver_client::PageserverClient, persistence::NodePersistence, scheduler::MaySchedule, -}; +use crate::pageserver_client::PageserverClient; +use crate::persistence::NodePersistence; +use crate::scheduler::MaySchedule; /// Represents the in-memory description of a Node. /// diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index 645cbdfce1..e9c54414a3 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -1,17 +1,13 @@ -use pageserver_api::{ - models::{ - detach_ancestor::AncestorDetached, LocationConfig, LocationConfigListResponse, - PageserverUtilization, SecondaryProgress, TenantScanRemoteStorageResponse, - TenantShardSplitRequest, TenantShardSplitResponse, TenantWaitLsnRequest, - TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, - TopTenantShardsResponse, - }, - shard::TenantShardId, -}; -use pageserver_client::{ - mgmt_api::{Client, Result}, - BlockUnblock, +use pageserver_api::models::detach_ancestor::AncestorDetached; +use pageserver_api::models::{ + LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress, + TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse, + TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, + TopTenantShardsRequest, TopTenantShardsResponse, }; +use pageserver_api::shard::TenantShardId; +use pageserver_client::BlockUnblock; +use pageserver_client::mgmt_api::{Client, Result}; use reqwest::StatusCode; use utils::id::{NodeId, TenantId, TimelineId}; diff --git a/storage_controller/src/peer_client.rs b/storage_controller/src/peer_client.rs index 1a15bae365..f3f275dee0 100644 --- a/storage_controller/src/peer_client.rs +++ b/storage_controller/src/peer_client.rs @@ -1,16 +1,17 @@ -use crate::tenant_shard::ObservedState; -use pageserver_api::shard::TenantShardId; -use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::error::Error as _; use std::time::Duration; -use tokio_util::sync::CancellationToken; use http_utils::error::HttpErrorBody; use hyper::Uri; +use pageserver_api::shard::TenantShardId; use reqwest::{StatusCode, Url}; +use serde::{Deserialize, Serialize}; +use tokio_util::sync::CancellationToken; use utils::backoff; +use crate::tenant_shard::ObservedState; + #[derive(Debug, Clone)] pub(crate) struct PeerClient { uri: Uri, diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 459c11add9..d34da0fef0 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -2,45 +2,38 @@ pub(crate) mod split_state; use std::collections::HashMap; use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; +use std::time::{Duration, Instant}; -use self::split_state::SplitState; use diesel::prelude::*; use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; use diesel_async::pooled_connection::bb8::Pool; -use diesel_async::pooled_connection::AsyncDieselConnectionManager; -use diesel_async::pooled_connection::ManagerConfig; -use diesel_async::AsyncPgConnection; -use diesel_async::RunQueryDsl; -use futures::future::BoxFuture; +use diesel_async::pooled_connection::{AsyncDieselConnectionManager, ManagerConfig}; +use diesel_async::{AsyncPgConnection, RunQueryDsl}; +use diesel_migrations::{EmbeddedMigrations, embed_migrations}; use futures::FutureExt; +use futures::future::BoxFuture; use itertools::Itertools; -use pageserver_api::controller_api::AvailabilityZone; -use pageserver_api::controller_api::MetadataHealthRecord; -use pageserver_api::controller_api::SafekeeperDescribeResponse; -use pageserver_api::controller_api::ShardSchedulingPolicy; -use pageserver_api::controller_api::SkSchedulingPolicy; -use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy}; +use pageserver_api::controller_api::{ + AvailabilityZone, MetadataHealthRecord, NodeSchedulingPolicy, PlacementPolicy, + SafekeeperDescribeResponse, ShardSchedulingPolicy, SkSchedulingPolicy, +}; use pageserver_api::models::TenantConfig; -use pageserver_api::shard::ShardConfigError; -use pageserver_api::shard::ShardIdentity; -use pageserver_api::shard::ShardStripeSize; -use pageserver_api::shard::{ShardCount, ShardNumber, TenantShardId}; -use rustls::client::danger::{ServerCertVerified, ServerCertVerifier}; +use pageserver_api::shard::{ + ShardConfigError, ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId, +}; use rustls::client::WebPkiServerVerifier; +use rustls::client::danger::{ServerCertVerified, ServerCertVerifier}; use rustls::crypto::ring; use scoped_futures::ScopedBoxFuture; use serde::{Deserialize, Serialize}; use utils::generation::Generation; use utils::id::{NodeId, TenantId}; +use self::split_state::SplitState; use crate::metrics::{ DatabaseQueryErrorLabelGroup, DatabaseQueryLatencyLabelGroup, METRICS_REGISTRY, }; use crate::node::Node; - -use diesel_migrations::{embed_migrations, EmbeddedMigrations}; const MIGRATIONS: EmbeddedMigrations = embed_migrations!("./migrations"); /// ## What do we store? @@ -479,8 +472,7 @@ impl Persistence { &self, shards: Vec, ) -> DatabaseResult<()> { - use crate::schema::metadata_health; - use crate::schema::tenant_shards; + use crate::schema::{metadata_health, tenant_shards}; let now = chrono::Utc::now(); @@ -554,8 +546,7 @@ impl Persistence { &self, input_node_id: NodeId, ) -> DatabaseResult> { - use crate::schema::nodes::dsl::scheduling_policy; - use crate::schema::nodes::dsl::*; + use crate::schema::nodes::dsl::{scheduling_policy, *}; use crate::schema::tenant_shards::dsl::*; let updated = self .with_measured_conn(DatabaseOperation::ReAttach, move |conn| { diff --git a/storage_controller/src/persistence/split_state.rs b/storage_controller/src/persistence/split_state.rs index bce1a75843..f83191038a 100644 --- a/storage_controller/src/persistence/split_state.rs +++ b/storage_controller/src/persistence/split_state.rs @@ -1,8 +1,8 @@ +use diesel::deserialize::{FromSql, FromSqlRow}; +use diesel::expression::AsExpression; use diesel::pg::{Pg, PgValue}; -use diesel::{ - deserialize::FromSql, deserialize::FromSqlRow, expression::AsExpression, serialize::ToSql, - sql_types::Int2, -}; +use diesel::serialize::ToSql; +use diesel::sql_types::Int2; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, FromSqlRow, AsExpression)] diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 4f0f170284..a327f6f50f 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -1,6 +1,8 @@ -use crate::pageserver_client::PageserverClient; -use crate::persistence::Persistence; -use crate::{compute_hook, service}; +use std::borrow::Cow; +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; + use json_structural_diff::JsonDiff; use pageserver_api::controller_api::{AvailabilityZone, MigrationConfig, PlacementPolicy}; use pageserver_api::models::{ @@ -9,10 +11,6 @@ use pageserver_api::models::{ use pageserver_api::shard::{ShardIdentity, TenantShardId}; use pageserver_client::mgmt_api; use reqwest::StatusCode; -use std::borrow::Cow; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; use utils::backoff::exponential_backoff; use utils::generation::Generation; @@ -23,7 +21,10 @@ use utils::sync::gate::GateGuard; use crate::compute_hook::{ComputeHook, NotifyError}; use crate::node::Node; +use crate::pageserver_client::PageserverClient; +use crate::persistence::Persistence; use crate::tenant_shard::{IntentState, ObservedState, ObservedStateDelta, ObservedStateLocation}; +use crate::{compute_hook, service}; const DEFAULT_HEATMAP_PERIOD: Duration = Duration::from_secs(60); @@ -511,7 +512,8 @@ impl Reconciler { } else if status == StatusCode::ACCEPTED { let total_runtime = started_at.elapsed(); if total_runtime > total_download_timeout { - tracing::warn!("Timed out after {}ms downloading layers to {node}. Progress so far: {}/{} layers, {}/{} bytes", + tracing::warn!( + "Timed out after {}ms downloading layers to {node}. Progress so far: {}/{} layers, {}/{} bytes", total_runtime.as_millis(), progress.layers_downloaded, progress.layers_total, diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs index 53cd8a908b..546fbf0726 100644 --- a/storage_controller/src/safekeeper.rs +++ b/storage_controller/src/safekeeper.rs @@ -1,16 +1,17 @@ -use std::{str::FromStr, time::Duration}; +use std::str::FromStr; +use std::time::Duration; use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy}; use reqwest::StatusCode; use safekeeper_client::mgmt_api; use tokio_util::sync::CancellationToken; -use utils::{backoff, id::NodeId, logging::SecretString}; +use utils::backoff; +use utils::id::NodeId; +use utils::logging::SecretString; -use crate::{ - heartbeater::SafekeeperState, - persistence::{DatabaseError, SafekeeperPersistence}, - safekeeper_client::SafekeeperClient, -}; +use crate::heartbeater::SafekeeperState; +use crate::persistence::{DatabaseError, SafekeeperPersistence}; +use crate::safekeeper_client::SafekeeperClient; #[derive(Clone)] pub struct Safekeeper { diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs index f234ab3429..fb5be092a0 100644 --- a/storage_controller/src/safekeeper_client.rs +++ b/storage_controller/src/safekeeper_client.rs @@ -1,13 +1,12 @@ -use crate::metrics::PageserverRequestLabelGroup; use safekeeper_api::models::{ PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, TimelineStatus, }; use safekeeper_client::mgmt_api::{Client, Result}; -use utils::{ - id::{NodeId, TenantId, TimelineId}, - logging::SecretString, -}; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::logging::SecretString; + +use crate::metrics::PageserverRequestLabelGroup; /// Thin wrapper around [`safekeeper_client::mgmt_api::Client`]. It allows the storage /// controller to collect metrics in a non-intrusive manner. diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs index 44936d018a..817cf04fe1 100644 --- a/storage_controller/src/scheduler.rs +++ b/storage_controller/src/scheduler.rs @@ -1,11 +1,17 @@ -use crate::{metrics::NodeLabelGroup, node::Node, tenant_shard::TenantShard}; +use std::collections::HashMap; +use std::fmt::Debug; + use http_utils::error::ApiError; use itertools::Itertools; -use pageserver_api::{controller_api::AvailabilityZone, models::PageserverUtilization}; +use pageserver_api::controller_api::AvailabilityZone; +use pageserver_api::models::PageserverUtilization; use serde::Serialize; -use std::{collections::HashMap, fmt::Debug}; use utils::id::NodeId; +use crate::metrics::NodeLabelGroup; +use crate::node::Node; +use crate::tenant_shard::TenantShard; + /// Scenarios in which we cannot find a suitable location for a tenant shard #[derive(thiserror::Error, Debug)] pub enum ScheduleError { @@ -775,10 +781,10 @@ impl Scheduler { if !matches!(context.mode, ScheduleMode::Speculative) { tracing::info!( - "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})", - scores.iter().map(|i| i.node_id().0).collect::>(), - preferred_az, - ); + "scheduler selected node {node_id} (elegible nodes {:?}, hard exclude: {hard_exclude:?}, soft exclude: {context:?}, preferred_az: {:?})", + scores.iter().map(|i| i.node_id().0).collect::>(), + preferred_az, + ); } // Note that we do not update shard count here to reflect the scheduling: that @@ -906,14 +912,14 @@ impl Scheduler { #[cfg(test)] pub(crate) mod test_utils { - use crate::node::Node; - use pageserver_api::{ - controller_api::{AvailabilityZone, NodeAvailability}, - models::utilization::test_utilization, - }; use std::collections::HashMap; + + use pageserver_api::controller_api::{AvailabilityZone, NodeAvailability}; + use pageserver_api::models::utilization::test_utilization; use utils::id::NodeId; + use crate::node::Node; + /// Test helper: synthesize the requested number of nodes, all in active state. /// /// Node IDs start at one. @@ -951,17 +957,13 @@ pub(crate) mod test_utils { #[cfg(test)] mod tests { - use pageserver_api::{ - controller_api::NodeAvailability, models::utilization::test_utilization, - shard::ShardIdentity, - }; - use utils::{ - id::TenantId, - shard::{ShardCount, ShardNumber, TenantShardId}, - }; + use pageserver_api::controller_api::NodeAvailability; + use pageserver_api::models::utilization::test_utilization; + use pageserver_api::shard::ShardIdentity; + use utils::id::TenantId; + use utils::shard::{ShardCount, ShardNumber, TenantShardId}; use super::*; - use crate::tenant_shard::IntentState; #[test] fn scheduler_basic() -> anyhow::Result<()> { diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index b9c2711192..8671e340bd 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1,112 +1,95 @@ pub mod chaos_injector; mod context_iterator; -use hyper::Uri; -use safekeeper_api::models::SafekeeperUtilization; -use std::{ - borrow::Cow, - cmp::Ordering, - collections::{BTreeMap, HashMap, HashSet}, - error::Error, - ops::Deref, - path::PathBuf, - str::FromStr, - sync::Arc, - time::{Duration, Instant}, -}; +use std::borrow::Cow; +use std::cmp::Ordering; +use std::collections::{BTreeMap, HashMap, HashSet}; +use std::error::Error; +use std::ops::Deref; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::Arc; +use std::time::{Duration, Instant}; -use crate::{ - background_node_operations::{ - Drain, Fill, Operation, OperationError, OperationHandler, MAX_RECONCILES_PER_OPERATION, - }, - compute_hook::{self, NotifyError}, - drain_utils::{self, TenantShardDrain, TenantShardIterator}, - heartbeater::SafekeeperState, - id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard}, - leadership::Leadership, - metrics, - peer_client::GlobalObservedState, - persistence::{ - AbortShardSplitStatus, ControllerPersistence, DatabaseResult, MetadataHealthPersistence, - ShardGenerationState, TenantFilter, - }, - reconciler::{ - ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder, - ReconcilerPriority, - }, - safekeeper::Safekeeper, - scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode}, - tenant_shard::{ - MigrateAttachment, ObservedStateDelta, ReconcileNeeded, ReconcilerStatus, - ScheduleOptimization, ScheduleOptimizationAction, - }, -}; use anyhow::Context; +use context_iterator::TenantShardContextIterator; use control_plane::storage_controller::{ AttachHookRequest, AttachHookResponse, InspectRequest, InspectResponse, }; use diesel::result::DatabaseErrorKind; -use futures::{stream::FuturesUnordered, StreamExt}; -use itertools::Itertools; -use pageserver_api::{ - controller_api::{ - AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, - NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, - SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest, - ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse, - TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard, - TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest, - TenantShardMigrateResponse, - }, - models::{ - SecondaryProgress, TenantConfigPatchRequest, TenantConfigRequest, - TimelineArchivalConfigRequest, TopTenantShardsRequest, - }, -}; -use reqwest::StatusCode; -use tracing::{instrument, Instrument}; - -use crate::pageserver_client::PageserverClient; +use futures::StreamExt; +use futures::stream::FuturesUnordered; use http_utils::error::ApiError; -use pageserver_api::{ - models::{ - self, LocationConfig, LocationConfigListResponse, LocationConfigMode, - PageserverUtilization, ShardParameters, TenantConfig, TenantLocationConfigRequest, - TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest, - TenantShardSplitResponse, TenantTimeTravelRequest, TimelineCreateRequest, TimelineInfo, - }, - shard::{ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId}, - upcall_api::{ - ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, - ValidateResponse, ValidateResponseTenant, - }, +use hyper::Uri; +use itertools::Itertools; +use pageserver_api::controller_api::{ + AvailabilityZone, MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, + NodeRegisterRequest, NodeSchedulingPolicy, NodeShard, NodeShardResponse, PlacementPolicy, + SafekeeperDescribeResponse, ShardSchedulingPolicy, ShardsPreferredAzsRequest, + ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, TenantCreateResponse, + TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard, + TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest, + TenantShardMigrateResponse, }; -use pageserver_client::{mgmt_api, BlockUnblock}; -use tokio::sync::{mpsc::error::TrySendError, TryAcquireError}; +use pageserver_api::models::{ + self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization, + SecondaryProgress, ShardParameters, TenantConfig, TenantConfigPatchRequest, + TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse, + TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse, + TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, + TopTenantShardsRequest, +}; +use pageserver_api::shard::{ + ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId, +}; +use pageserver_api::upcall_api::{ + ReAttachRequest, ReAttachResponse, ReAttachResponseTenant, ValidateRequest, ValidateResponse, + ValidateResponseTenant, +}; +use pageserver_client::{BlockUnblock, mgmt_api}; +use reqwest::StatusCode; +use safekeeper_api::models::SafekeeperUtilization; +use tokio::sync::TryAcquireError; +use tokio::sync::mpsc::error::TrySendError; use tokio_util::sync::CancellationToken; -use utils::{ - completion::Barrier, - failpoint_support, - generation::Generation, - id::{NodeId, TenantId, TimelineId}, - pausable_failpoint, - sync::gate::Gate, -}; +use tracing::{Instrument, instrument}; +use utils::completion::Barrier; +use utils::generation::Generation; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::sync::gate::Gate; +use utils::{failpoint_support, pausable_failpoint}; -use crate::{ - compute_hook::ComputeHook, - heartbeater::{Heartbeater, PageserverState}, - node::{AvailabilityTransition, Node}, - persistence::{split_state::SplitState, DatabaseError, Persistence, TenantShardPersistence}, - reconciler::attached_location_conf, - scheduler::Scheduler, - tenant_shard::{ - IntentState, ObservedState, ObservedStateLocation, ReconcileResult, ReconcileWaitError, - ReconcilerWaiter, TenantShard, - }, +use crate::background_node_operations::{ + Drain, Fill, MAX_RECONCILES_PER_OPERATION, Operation, OperationError, OperationHandler, +}; +use crate::compute_hook::{self, ComputeHook, NotifyError}; +use crate::drain_utils::{self, TenantShardDrain, TenantShardIterator}; +use crate::heartbeater::{Heartbeater, PageserverState, SafekeeperState}; +use crate::id_lock_map::{ + IdLockMap, TracingExclusiveGuard, trace_exclusive_lock, trace_shared_lock, +}; +use crate::leadership::Leadership; +use crate::metrics; +use crate::node::{AvailabilityTransition, Node}; +use crate::pageserver_client::PageserverClient; +use crate::peer_client::GlobalObservedState; +use crate::persistence::split_state::SplitState; +use crate::persistence::{ + AbortShardSplitStatus, ControllerPersistence, DatabaseError, DatabaseResult, + MetadataHealthPersistence, Persistence, ShardGenerationState, TenantFilter, + TenantShardPersistence, +}; +use crate::reconciler::{ + ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder, ReconcilerPriority, + attached_location_conf, +}; +use crate::safekeeper::Safekeeper; +use crate::scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode, Scheduler}; +use crate::tenant_shard::{ + IntentState, MigrateAttachment, ObservedState, ObservedStateDelta, ObservedStateLocation, + ReconcileNeeded, ReconcileResult, ReconcileWaitError, ReconcilerStatus, ReconcilerWaiter, + ScheduleOptimization, ScheduleOptimizationAction, TenantShard, }; - -use context_iterator::TenantShardContextIterator; const WAITER_FILL_DRAIN_POLL_TIMEOUT: Duration = Duration::from_millis(500); @@ -787,7 +770,9 @@ impl Service { }); } - tracing::info!("Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)"); + tracing::info!( + "Startup complete, spawned {reconcile_tasks} reconciliation tasks ({shard_count} shards total)" + ); } async fn initial_heartbeat_round<'a>( @@ -1182,7 +1167,9 @@ impl Service { let mut safekeepers = (*locked.safekeepers).clone(); for (id, state) in deltas.0 { let Some(sk) = safekeepers.get_mut(&id) else { - tracing::info!("Couldn't update safekeeper safekeeper state for id {id} from heartbeat={state:?}"); + tracing::info!( + "Couldn't update safekeeper safekeeper state for id {id} from heartbeat={state:?}" + ); continue; }; sk.set_availability(state); @@ -1537,7 +1524,9 @@ impl Service { // If a node was removed before being completely drained, it is legal for it to leave behind a `generation_pageserver` referring // to a non-existent node, because node deletion doesn't block on completing the reconciliations that will issue new generations // on different pageservers. - tracing::warn!("Tenant shard {tenant_shard_id} references non-existent node {generation_pageserver} in database, will be rescheduled"); + tracing::warn!( + "Tenant shard {tenant_shard_id} references non-existent node {generation_pageserver} in database, will be rescheduled" + ); } } let new_tenant = TenantShard::from_persistent(tsp, intent)?; @@ -1867,7 +1856,7 @@ impl Service { } Ok(AttachHookResponse { - gen: attach_req + r#gen: attach_req .node_id .map(|_| tenant_shard.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap()), }) @@ -2039,7 +2028,7 @@ impl Service { let new_gen = *new_gen; response.tenants.push(ReAttachResponseTenant { id: *tenant_shard_id, - gen: Some(new_gen.into().unwrap()), + r#gen: Some(new_gen.into().unwrap()), // A tenant is only put into multi or stale modes in the middle of a [`Reconciler::live_migrate`] // execution. If a pageserver is restarted during that process, then the reconcile pass will // fail, and start from scratch, so it doesn't make sense for us to try and preserve @@ -2076,7 +2065,7 @@ impl Service { response.tenants.push(ReAttachResponseTenant { id: *tenant_shard_id, - gen: None, + r#gen: None, mode: LocationConfigMode::Secondary, }); @@ -2138,15 +2127,19 @@ impl Service { let locked = self.inner.read().unwrap(); for req_tenant in validate_req.tenants { if let Some(tenant_shard) = locked.tenants.get(&req_tenant.id) { - let valid = tenant_shard.generation == Some(Generation::new(req_tenant.gen)); + let valid = tenant_shard.generation == Some(Generation::new(req_tenant.r#gen)); tracing::info!( "handle_validate: {}(gen {}): valid={valid} (latest {:?})", req_tenant.id, - req_tenant.gen, + req_tenant.r#gen, tenant_shard.generation ); - in_memory_result.push((req_tenant.id, Generation::new(req_tenant.gen), valid)); + in_memory_result.push(( + req_tenant.id, + Generation::new(req_tenant.r#gen), + valid, + )); } else { // This is legal: for example during a shard split the pageserver may still // have deletions in its queue from the old pre-split shard, or after deletion @@ -2165,13 +2158,11 @@ impl Service { // in case of controller split-brain, where some other controller process might have incremented the generation. let db_generations = self .persistence - .shard_generations(in_memory_result.iter().filter_map(|i| { - if i.2 { - Some(&i.0) - } else { - None - } - })) + .shard_generations( + in_memory_result + .iter() + .filter_map(|i| if i.2 { Some(&i.0) } else { None }), + ) .await?; let db_generations = db_generations.into_iter().collect::>(); @@ -2323,7 +2314,9 @@ impl Service { // Unique key violation: this is probably a retry. Because the shard count is part of the unique key, // if we see a unique key violation it means that the creation request's shard count matches the previous // creation's shard count. - tracing::info!("Tenant shards already present in database, proceeding with idempotent creation..."); + tracing::info!( + "Tenant shards already present in database, proceeding with idempotent creation..." + ); } // Any other database error is unexpected and a bug. Err(e) => return Err(ApiError::InternalServerError(anyhow::anyhow!(e))), @@ -3004,7 +2997,7 @@ impl Service { None => { return Err(ApiError::NotFound( anyhow::anyhow!("Tenant not found").into(), - )) + )); } } }; @@ -3071,7 +3064,9 @@ impl Service { }) .find(|(_, _, mode)| *mode != LocationConfigMode::Detached); if let Some((node_id, _observed_location, mode)) = maybe_attached { - return Err(ApiError::InternalServerError(anyhow::anyhow!("We observed attached={mode:?} tenant in node_id={node_id} shard with tenant_shard_id={shard_id}"))); + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "We observed attached={mode:?} tenant in node_id={node_id} shard with tenant_shard_id={shard_id}" + ))); } } let scheduler = &mut locked.scheduler; @@ -3944,7 +3939,9 @@ impl Service { // This can only happen if there is a split brain controller modifying the database. This should // never happen when testing, and if it happens in production we can only log the issue. debug_assert!(false); - tracing::error!("Shard {shard_id} not found in generation state! Is another rogue controller running?"); + tracing::error!( + "Shard {shard_id} not found in generation state! Is another rogue controller running?" + ); continue; }; let (generation, generation_pageserver) = generation; @@ -3953,13 +3950,17 @@ impl Service { // This is legitimate only in a very narrow window where the shard was only just configured into // Attached mode after being created in Secondary or Detached mode, and it has had its generation // set but not yet had a Reconciler run (reconciler is the only thing that sets generation_pageserver). - tracing::warn!("Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?"); + tracing::warn!( + "Shard {shard_id} generation is set ({generation:?}) but generation_pageserver is None, reconciler not run yet?" + ); } } else { // This should never happen: a shard with no generation is only permitted when it was created in some state // other than PlacementPolicy::Attached (and generation is always written to DB before setting Attached in memory) debug_assert!(false); - tracing::error!("Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!"); + tracing::error!( + "Shard {shard_id} generation is None, but it is in PlacementPolicy::Attached mode!" + ); continue; } } @@ -4492,13 +4493,17 @@ impl Service { // if the original attachment location is offline. if let Some(node_id) = shard.intent.get_attached() { if !nodes.get(node_id).unwrap().is_available() { - tracing::info!("Demoting attached intent for {tenant_shard_id} on unavailable node {node_id}"); + tracing::info!( + "Demoting attached intent for {tenant_shard_id} on unavailable node {node_id}" + ); shard.intent.demote_attached(scheduler, *node_id); } } for node_id in shard.intent.get_secondary().clone() { if !nodes.get(&node_id).unwrap().is_available() { - tracing::info!("Dropping secondary intent for {tenant_shard_id} on unavailable node {node_id}"); + tracing::info!( + "Dropping secondary intent for {tenant_shard_id} on unavailable node {node_id}" + ); shard.intent.remove_secondary(scheduler, node_id); } } @@ -4526,7 +4531,9 @@ impl Service { // rely on the reconciliation that happens when a node transitions to Active to clean up. Since we have // removed child shards from our in-memory state and database, the reconciliation will implicitly remove // them from the node. - tracing::warn!("Node {node} unavailable, can't clean up during split abort. It will be cleaned up when it is reactivated."); + tracing::warn!( + "Node {node} unavailable, can't clean up during split abort. It will be cleaned up when it is reactivated." + ); continue; } @@ -4971,7 +4978,10 @@ impl Service { // applies the new stripe size to the children. let mut shard_ident = shard_ident.unwrap(); if shard_ident.count.count() > 1 && shard_ident.stripe_size != new_stripe_size { - return Err(ApiError::BadRequest(anyhow::anyhow!("Attempted to change stripe size ({:?}->{new_stripe_size:?}) on a tenant with multiple shards", shard_ident.stripe_size))); + return Err(ApiError::BadRequest(anyhow::anyhow!( + "Attempted to change stripe size ({:?}->{new_stripe_size:?}) on a tenant with multiple shards", + shard_ident.stripe_size + ))); } shard_ident.stripe_size = new_stripe_size; @@ -5226,8 +5236,11 @@ impl Service { ) .await { - tracing::warn!("Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})", - child_id, child_ps); + tracing::warn!( + "Failed to update compute of {}->{} during split, proceeding anyway to complete split ({e})", + child_id, + child_ps + ); failed_notifications.push(child_id); } } @@ -5283,9 +5296,13 @@ impl Service { match shard.policy { PlacementPolicy::Attached(n) => { // If our new attached node was a secondary, it no longer should be. - shard.intent.remove_secondary(scheduler, migrate_req.node_id); + shard + .intent + .remove_secondary(scheduler, migrate_req.node_id); - shard.intent.set_attached(scheduler, Some(migrate_req.node_id)); + shard + .intent + .set_attached(scheduler, Some(migrate_req.node_id)); // If we were already attached to something, demote that to a secondary if let Some(old_attached) = old_attached { @@ -5306,7 +5323,7 @@ impl Service { PlacementPolicy::Detached => { return Err(ApiError::BadRequest(anyhow::anyhow!( "Cannot migrate a tenant that is PlacementPolicy::Detached: configure it to an attached policy first" - ))) + ))); } } @@ -5367,7 +5384,9 @@ impl Service { shard.intent ); } else if shard.intent.get_attached() == &Some(migrate_req.node_id) { - tracing::info!("Migrating secondary to {node}: already attached where we were asked to create a secondary"); + tracing::info!( + "Migrating secondary to {node}: already attached where we were asked to create a secondary" + ); } else { let old_secondaries = shard.intent.get_secondary().clone(); for secondary in old_secondaries { @@ -5880,7 +5899,7 @@ impl Service { return Err(ApiError::InternalServerError(anyhow::anyhow!( "{} attached as primary+secondary on the same node", tid - ))) + ))); } (true, false) => Some(false), (false, true) => Some(true), @@ -6923,12 +6942,16 @@ impl Service { // Check that maybe_optimizable doesn't disagree with the actual optimization functions. // Only do this in testing builds because it is not a correctness-critical check, so we shouldn't // panic in prod if we hit this, or spend cycles on it in prod. - assert!(shard - .optimize_attachment(scheduler, &schedule_context) - .is_none()); - assert!(shard - .optimize_secondary(scheduler, &schedule_context) - .is_none()); + assert!( + shard + .optimize_attachment(scheduler, &schedule_context) + .is_none() + ); + assert!( + shard + .optimize_secondary(scheduler, &schedule_context) + .is_none() + ); } continue; } @@ -6984,7 +7007,9 @@ impl Service { } Some(node) => { if !node.is_available() { - tracing::info!("Skipping optimization migration of {tenant_shard_id} to {new_attached_node_id} because node unavailable"); + tracing::info!( + "Skipping optimization migration of {tenant_shard_id} to {new_attached_node_id} because node unavailable" + ); } else { // Accumulate optimizations that require fetching secondary status, so that we can execute these // remote API requests concurrently. @@ -7030,7 +7055,9 @@ impl Service { { match secondary_status { Err(e) => { - tracing::info!("Skipping migration of {tenant_shard_id} to {node}, error querying secondary: {e}"); + tracing::info!( + "Skipping migration of {tenant_shard_id} to {node}, error querying secondary: {e}" + ); } Ok(progress) => { // We require secondary locations to have less than 10GiB of downloads pending before we will use @@ -7043,7 +7070,9 @@ impl Service { || progress.bytes_total - progress.bytes_downloaded > DOWNLOAD_FRESHNESS_THRESHOLD { - tracing::info!("Skipping migration of {tenant_shard_id} to {node} because secondary isn't ready: {progress:?}"); + tracing::info!( + "Skipping migration of {tenant_shard_id} to {node} because secondary isn't ready: {progress:?}" + ); #[cfg(feature = "testing")] if progress.heatmap_mtime.is_none() { @@ -7149,14 +7178,18 @@ impl Service { { Some(Err(e)) => { tracing::info!( - "Failed to download heatmap from {secondary_node} for {tenant_shard_id}: {e}" - ); + "Failed to download heatmap from {secondary_node} for {tenant_shard_id}: {e}" + ); } None => { - tracing::info!("Cancelled while downloading heatmap from {secondary_node} for {tenant_shard_id}"); + tracing::info!( + "Cancelled while downloading heatmap from {secondary_node} for {tenant_shard_id}" + ); } Some(Ok(progress)) => { - tracing::info!("Successfully downloaded heatmap from {secondary_node} for {tenant_shard_id}: {progress:?}"); + tracing::info!( + "Successfully downloaded heatmap from {secondary_node} for {tenant_shard_id}: {progress:?}" + ); } } } @@ -7241,7 +7274,9 @@ impl Service { // We spawn a task to run this, so it's exactly like some external API client requesting it. We don't // want to block the background reconcile loop on this. - tracing::info!("Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}"); + tracing::info!( + "Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}" + ); let this = self.clone(); tokio::spawn( diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs index 25a0fab5ca..2ff68d7037 100644 --- a/storage_controller/src/service/chaos_injector.rs +++ b/storage_controller/src/service/chaos_injector.rs @@ -1,8 +1,6 @@ -use std::{ - collections::{BTreeMap, HashMap}, - sync::Arc, - time::Duration, -}; +use std::collections::{BTreeMap, HashMap}; +use std::sync::Arc; +use std::time::Duration; use pageserver_api::controller_api::ShardSchedulingPolicy; use rand::seq::SliceRandom; @@ -176,12 +174,19 @@ impl ChaosInjector { let mut victims = Vec::with_capacity(batch_size); if out_of_home_az.len() >= batch_size { - tracing::info!("Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)", out_of_home_az.len()); + tracing::info!( + "Injecting chaos: found {batch_size} shards to migrate back to home AZ (total {} out of home AZ)", + out_of_home_az.len() + ); out_of_home_az.shuffle(&mut thread_rng()); victims.extend(out_of_home_az.into_iter().take(batch_size)); } else { - tracing::info!("Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate", out_of_home_az.len(), std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len())); + tracing::info!( + "Injecting chaos: found {} shards to migrate back to home AZ, picking {} random shards to migrate", + out_of_home_az.len(), + std::cmp::min(batch_size - out_of_home_az.len(), in_home_az.len()) + ); victims.extend(out_of_home_az); in_home_az.shuffle(&mut thread_rng()); diff --git a/storage_controller/src/service/context_iterator.rs b/storage_controller/src/service/context_iterator.rs index dd6913e988..c4784e5e36 100644 --- a/storage_controller/src/service/context_iterator.rs +++ b/storage_controller/src/service/context_iterator.rs @@ -54,17 +54,16 @@ impl<'a> Iterator for TenantShardContextIterator<'a> { #[cfg(test)] mod tests { - use std::{collections::BTreeMap, str::FromStr}; + use std::collections::BTreeMap; + use std::str::FromStr; use pageserver_api::controller_api::PlacementPolicy; use utils::shard::{ShardCount, ShardNumber}; - use crate::{ - scheduler::test_utils::make_test_nodes, service::Scheduler, - tenant_shard::tests::make_test_tenant_with_id, - }; - use super::*; + use crate::scheduler::test_utils::make_test_nodes; + use crate::service::Scheduler; + use crate::tenant_shard::tests::make_test_tenant_with_id; #[test] fn test_context_iterator() { diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 56a36dc2df..34fd244023 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -1,50 +1,39 @@ -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, - time::Duration, -}; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use std::time::Duration; -use crate::{ - metrics::{ - self, ReconcileCompleteLabelGroup, ReconcileLongRunningLabelGroup, ReconcileOutcome, - }, - persistence::TenantShardPersistence, - reconciler::{ReconcileUnits, ReconcilerConfig}, - scheduler::{ - AffinityScore, AttachedShardTag, NodeSchedulingScore, NodeSecondarySchedulingScore, - RefCountUpdate, ScheduleContext, SecondaryShardTag, ShardTag, - }, - service::ReconcileResultRequest, -}; use futures::future::{self, Either}; use itertools::Itertools; use pageserver_api::controller_api::{AvailabilityZone, PlacementPolicy, ShardSchedulingPolicy}; -use pageserver_api::{ - models::{LocationConfig, LocationConfigMode, TenantConfig}, - shard::{ShardIdentity, TenantShardId}, -}; +use pageserver_api::models::{LocationConfig, LocationConfigMode, TenantConfig}; +use pageserver_api::shard::{ShardIdentity, TenantShardId}; use serde::{Deserialize, Serialize}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; -use tracing::{instrument, Instrument}; -use utils::{ - generation::Generation, - id::NodeId, - seqwait::{SeqWait, SeqWaitError}, - shard::ShardCount, - sync::gate::GateGuard, -}; +use tracing::{Instrument, instrument}; +use utils::generation::Generation; +use utils::id::NodeId; +use utils::seqwait::{SeqWait, SeqWaitError}; +use utils::shard::ShardCount; +use utils::sync::gate::GateGuard; -use crate::{ - compute_hook::ComputeHook, - node::Node, - persistence::{split_state::SplitState, Persistence}, - reconciler::{ - attached_location_conf, secondary_location_conf, ReconcileError, Reconciler, TargetState, - }, - scheduler::{ScheduleError, Scheduler}, - service, Sequence, +use crate::compute_hook::ComputeHook; +use crate::metrics::{ + self, ReconcileCompleteLabelGroup, ReconcileLongRunningLabelGroup, ReconcileOutcome, }; +use crate::node::Node; +use crate::persistence::split_state::SplitState; +use crate::persistence::{Persistence, TenantShardPersistence}; +use crate::reconciler::{ + ReconcileError, ReconcileUnits, Reconciler, ReconcilerConfig, TargetState, + attached_location_conf, secondary_location_conf, +}; +use crate::scheduler::{ + AffinityScore, AttachedShardTag, NodeSchedulingScore, NodeSecondarySchedulingScore, + RefCountUpdate, ScheduleContext, ScheduleError, Scheduler, SecondaryShardTag, ShardTag, +}; +use crate::service::ReconcileResultRequest; +use crate::{Sequence, service}; /// Serialization helper fn read_last_error(v: &std::sync::Mutex>, serializer: S) -> Result @@ -835,7 +824,9 @@ impl TenantShard { let current_score = current_score.for_optimization(); if candidate_score < current_score { - tracing::info!("Found a lower scoring location! {candidate} is better than {current} ({candidate_score:?} is better than {current_score:?})"); + tracing::info!( + "Found a lower scoring location! {candidate} is better than {current} ({candidate_score:?} is better than {current_score:?})" + ); Some(true) } else { // The candidate node is no better than our current location, so don't migrate @@ -1005,7 +996,7 @@ impl TenantShard { // most cases, even if some nodes are offline or have scheduling=pause set. debug_assert!(self.intent.attached.is_some()); // We should not make it here unless attached -- this - // logic presumes we are in a mode where we want secondaries to be in non-home AZ + // logic presumes we are in a mode where we want secondaries to be in non-home AZ if let Some(retain_secondary) = self.intent.get_secondary().iter().find(|n| { let in_home_az = scheduler.get_node_az(n) == self.intent.preferred_az_id; let is_available = secondary_scores @@ -1029,7 +1020,8 @@ impl TenantShard { } // Fall through: we didn't identify one to remove. This ought to be rare. - tracing::warn!("Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)", + tracing::warn!( + "Keeping extra secondaries: can't determine which of {:?} to remove (some nodes offline?)", self.intent.get_secondary() ); } else { @@ -1798,8 +1790,8 @@ impl TenantShard { let conf = observed.conf.as_ref()?; match (conf.generation, conf.mode) { - (Some(gen), AttachedMulti | AttachedSingle | AttachedStale) => { - Some((*node_id, gen)) + (Some(gen_), AttachedMulti | AttachedSingle | AttachedStale) => { + Some((*node_id, gen_)) } _ => None, } @@ -1807,7 +1799,7 @@ impl TenantShard { .sorted_by(|(_lhs_node_id, lhs_gen), (_rhs_node_id, rhs_gen)| { lhs_gen.cmp(rhs_gen).reverse() }) - .map(|(node_id, gen)| (node_id, Generation::new(gen))) + .map(|(node_id, gen_)| (node_id, Generation::new(gen_))) .collect() } @@ -1839,7 +1831,10 @@ impl TenantShard { (Some(crnt), Some(new)) if crnt_gen > new_gen => { tracing::warn!( "Skipping observed state update {}: {:?} and using None due to stale generation ({} > {})", - node_id, loc, crnt, new + node_id, + loc, + crnt, + new ); self.observed @@ -1896,18 +1891,17 @@ impl Drop for TenantShard { #[cfg(test)] pub(crate) mod tests { - use std::{cell::RefCell, rc::Rc}; + use std::cell::RefCell; + use std::rc::Rc; - use pageserver_api::{ - controller_api::NodeAvailability, - shard::{ShardCount, ShardNumber}, - }; - use rand::{rngs::StdRng, SeedableRng}; + use pageserver_api::controller_api::NodeAvailability; + use pageserver_api::shard::{ShardCount, ShardNumber}; + use rand::SeedableRng; + use rand::rngs::StdRng; use utils::id::TenantId; - use crate::scheduler::test_utils::make_test_nodes; - use super::*; + use crate::scheduler::test_utils::make_test_nodes; fn make_test_tenant_shard(policy: PlacementPolicy) -> TenantShard { let tenant_id = TenantId::generate(); @@ -2085,16 +2079,20 @@ pub(crate) mod tests { // In pause mode, schedule() shouldn't do anything tenant_shard.scheduling_policy = ShardSchedulingPolicy::Pause; - assert!(tenant_shard - .schedule(&mut scheduler, &mut ScheduleContext::default()) - .is_ok()); + assert!( + tenant_shard + .schedule(&mut scheduler, &mut ScheduleContext::default()) + .is_ok() + ); assert!(tenant_shard.intent.all_pageservers().is_empty()); // In active mode, schedule() works tenant_shard.scheduling_policy = ShardSchedulingPolicy::Active; - assert!(tenant_shard - .schedule(&mut scheduler, &mut ScheduleContext::default()) - .is_ok()); + assert!( + tenant_shard + .schedule(&mut scheduler, &mut ScheduleContext::default()) + .is_ok() + ); assert!(!tenant_shard.intent.all_pageservers().is_empty()); tenant_shard.intent.clear(&mut scheduler); @@ -2621,9 +2619,11 @@ pub(crate) mod tests { ); let mut schedule_context = ScheduleContext::default(); for shard in &mut shards { - assert!(shard - .schedule(&mut scheduler, &mut schedule_context) - .is_ok()); + assert!( + shard + .schedule(&mut scheduler, &mut schedule_context) + .is_ok() + ); } // Initial: attached locations land in the tenant's home AZ. diff --git a/storage_scrubber/Cargo.toml b/storage_scrubber/Cargo.toml index 609f3bf009..7f6544b894 100644 --- a/storage_scrubber/Cargo.toml +++ b/storage_scrubber/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "storage_scrubber" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [dependencies] diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index b42709868b..f0ba632fd4 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -1,12 +1,19 @@ use std::collections::{HashMap, HashSet}; use std::time::SystemTime; +use futures_util::StreamExt; use itertools::Itertools; +use pageserver::tenant::IndexPart; use pageserver::tenant::checks::check_valid_layermap; use pageserver::tenant::layer_map::LayerMap; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver::tenant::remote_timeline_client::manifest::TenantManifest; +use pageserver::tenant::remote_timeline_client::{ + parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path, +}; +use pageserver::tenant::storage_layer::LayerName; use pageserver_api::shard::ShardIndex; +use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath}; use tokio_util::sync::CancellationToken; use tracing::{info, warn}; use utils::generation::Generation; @@ -15,14 +22,7 @@ use utils::shard::TenantShardId; use crate::cloud_admin_api::BranchData; use crate::metadata_stream::stream_listing; -use crate::{download_object_with_retries, RootTarget, TenantShardTimelineId}; -use futures_util::StreamExt; -use pageserver::tenant::remote_timeline_client::{ - parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path, -}; -use pageserver::tenant::storage_layer::LayerName; -use pageserver::tenant::IndexPart; -use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath}; +use crate::{RootTarget, TenantShardTimelineId, download_object_with_retries}; pub(crate) struct TimelineAnalysis { /// Anomalies detected @@ -329,11 +329,11 @@ pub(crate) enum BlobDataParseResult { pub(crate) fn parse_layer_object_name(name: &str) -> Result<(LayerName, Generation), String> { match name.rsplit_once('-') { // FIXME: this is gross, just use a regex? - Some((layer_filename, gen)) if gen.len() == 8 => { + Some((layer_filename, gen_)) if gen_.len() == 8 => { let layer = layer_filename.parse::()?; - let gen = - Generation::parse_suffix(gen).ok_or("Malformed generation suffix".to_string())?; - Ok((layer, gen)) + let gen_ = + Generation::parse_suffix(gen_).ok_or("Malformed generation suffix".to_string())?; + Ok((layer, gen_)) } _ => Ok((name.parse::()?, Generation::none())), } @@ -423,9 +423,9 @@ async fn list_timeline_blobs_impl( tracing::info!("initdb archive preserved {key}"); } Some(maybe_layer_name) => match parse_layer_object_name(maybe_layer_name) { - Ok((new_layer, gen)) => { - tracing::debug!("Parsed layer key: {new_layer} {gen:?}"); - s3_layers.insert((new_layer, gen)); + Ok((new_layer, gen_)) => { + tracing::debug!("Parsed layer key: {new_layer} {gen_:?}"); + s3_layers.insert((new_layer, gen_)); } Err(e) => { tracing::info!("Error parsing {maybe_layer_name} as layer name: {e}"); @@ -465,7 +465,7 @@ async fn list_timeline_blobs_impl( .max_by_key(|i| i.1) .map(|(k, g)| (k.clone(), g)) { - Some((key, gen)) => (Some::(key.to_owned()), gen), + Some((key, gen_)) => (Some::(key.to_owned()), gen_), None => { // Legacy/missing case: one or zero index parts, which did not have a generation (index_part_keys.pop(), Generation::none()) @@ -521,7 +521,7 @@ async fn list_timeline_blobs_impl( }, unused_index_keys: index_part_keys, unknown_keys, - })) + })); } Err(index_parse_error) => errors.push(format!( "index_part.json body parsing error: {index_parse_error}" @@ -631,7 +631,7 @@ pub(crate) async fn list_tenant_manifests( .map(|(g, obj)| (*g, obj.clone())) .unwrap(); - manifests.retain(|(gen, _obj)| gen != &latest_generation); + manifests.retain(|(gen_, _obj)| gen_ != &latest_generation); let manifest_bytes = match download_object_with_retries(remote_client, &latest_listing_object.key).await { diff --git a/storage_scrubber/src/cloud_admin_api.rs b/storage_scrubber/src/cloud_admin_api.rs index b1dfe3a53f..5cf286c662 100644 --- a/storage_scrubber/src/cloud_admin_api.rs +++ b/storage_scrubber/src/cloud_admin_api.rs @@ -3,11 +3,9 @@ use std::error::Error as _; use chrono::{DateTime, Utc}; use futures::Future; use hex::FromHex; - -use reqwest::{header, Client, StatusCode, Url}; +use reqwest::{Client, StatusCode, Url, header}; use serde::Deserialize; use tokio::sync::Semaphore; - use tokio_util::sync::CancellationToken; use utils::backoff; use utils::id::{TenantId, TimelineId}; diff --git a/storage_scrubber/src/find_large_objects.rs b/storage_scrubber/src/find_large_objects.rs index 95d3af1453..efb05fb55e 100644 --- a/storage_scrubber/src/find_large_objects.rs +++ b/storage_scrubber/src/find_large_objects.rs @@ -5,10 +5,9 @@ use pageserver::tenant::storage_layer::LayerName; use remote_storage::ListingMode; use serde::{Deserialize, Serialize}; -use crate::{ - checks::parse_layer_object_name, init_remote, metadata_stream::stream_tenants, - stream_objects_with_retries, BucketConfig, NodeKind, -}; +use crate::checks::parse_layer_object_name; +use crate::metadata_stream::stream_tenants; +use crate::{BucketConfig, NodeKind, init_remote, stream_objects_with_retries}; #[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] enum LargeObjectKind { diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index a4e5107e3d..e4f69a1669 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -3,11 +3,9 @@ //! Garbage means S3 objects which are either not referenced by any metadata, //! or are referenced by a control plane tenant/timeline in a deleted state. -use std::{ - collections::{HashMap, HashSet}, - sync::Arc, - time::Duration, -}; +use std::collections::{HashMap, HashSet}; +use std::sync::Arc; +use std::time::Duration; use anyhow::Context; use futures_util::TryStreamExt; @@ -16,13 +14,14 @@ use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePat use serde::{Deserialize, Serialize}; use tokio_stream::StreamExt; use tokio_util::sync::CancellationToken; -use utils::{backoff, id::TenantId}; +use utils::backoff; +use utils::id::TenantId; +use crate::cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData}; +use crate::metadata_stream::{stream_tenant_timelines, stream_tenants_maybe_prefix}; use crate::{ - cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData}, + BucketConfig, ConsoleConfig, MAX_RETRIES, NodeKind, TenantShardTimelineId, TraversingDepth, init_remote, list_objects_with_retries, - metadata_stream::{stream_tenant_timelines, stream_tenants_maybe_prefix}, - BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth, MAX_RETRIES, }; #[derive(Serialize, Deserialize, Debug)] @@ -259,14 +258,21 @@ async fn find_garbage_inner( .await?; if let Some(object) = tenant_objects.keys.first() { if object.key.get_path().as_str().ends_with("heatmap-v1.json") { - tracing::info!("Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)"); + tracing::info!( + "Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)" + ); garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); continue; } else { - tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key); + tracing::info!( + "Tenant {tenant_shard_id} is missing in console and contains one object: {}", + object.key + ); } } else { - tracing::info!("Tenant {tenant_shard_id} is missing in console appears to have been deleted while we ran"); + tracing::info!( + "Tenant {tenant_shard_id} is missing in console appears to have been deleted while we ran" + ); } } else { // A console-unknown tenant with timelines: check if these timelines only contain initdb.tar.zst, from the initial @@ -295,9 +301,13 @@ async fn find_garbage_inner( } if any_non_initdb { - tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb"); + tracing::info!( + "Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb" + ); } else { - tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb"); + tracing::info!( + "Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb" + ); garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); continue; } @@ -546,7 +556,9 @@ pub async fn purge_garbage( .any(|g| matches!(g.entity, GarbageEntity::Timeline(_))) && garbage_list.active_timeline_count == 0 { - anyhow::bail!("Refusing to purge a garbage list containing garbage timelines that reports 0 active timelines"); + anyhow::bail!( + "Refusing to purge a garbage list containing garbage timelines that reports 0 active timelines" + ); } let filtered_items = garbage_list diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index 224235098c..34e43fcc0b 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -17,15 +17,14 @@ use std::time::{Duration, SystemTime}; use anyhow::Context; use aws_config::retry::{RetryConfigBuilder, RetryMode}; +use aws_sdk_s3::Client; use aws_sdk_s3::config::Region; use aws_sdk_s3::error::DisplayErrorContext; -use aws_sdk_s3::Client; - use camino::{Utf8Path, Utf8PathBuf}; use clap::ValueEnum; use futures::{Stream, StreamExt}; -use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_timeline_path}; use pageserver::tenant::TENANTS_SEGMENT_NAME; +use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_timeline_path}; use pageserver_api::shard::TenantShardId; use remote_storage::{ DownloadOpts, GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, @@ -38,7 +37,8 @@ use tokio::io::AsyncReadExt; use tokio_util::sync::CancellationToken; use tracing::{error, warn}; use tracing_appender::non_blocking::WorkerGuard; -use tracing_subscriber::{fmt, prelude::*, EnvFilter}; +use tracing_subscriber::prelude::*; +use tracing_subscriber::{EnvFilter, fmt}; use utils::fs_ext; use utils::id::{TenantId, TenantTimelineId, TimelineId}; @@ -411,10 +411,10 @@ async fn init_remote( let default_prefix = default_prefix_in_bucket(node_kind).to_string(); match &mut storage_config.0.storage { - RemoteStorageKind::AwsS3(ref mut config) => { + RemoteStorageKind::AwsS3(config) => { config.prefix_in_bucket.get_or_insert(default_prefix); } - RemoteStorageKind::AzureContainer(ref mut config) => { + RemoteStorageKind::AzureContainer(config) => { config.prefix_in_container.get_or_insert(default_prefix); } RemoteStorageKind::LocalFs { .. } => (), diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index fa6ee90b66..fb2ab02565 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -1,24 +1,20 @@ -use anyhow::{anyhow, bail, Context}; +use anyhow::{Context, anyhow, bail}; use camino::Utf8PathBuf; +use clap::{Parser, Subcommand}; use pageserver_api::controller_api::{MetadataHealthUpdateRequest, MetadataHealthUpdateResponse}; use pageserver_api::shard::TenantShardId; use reqwest::{Method, Url}; use storage_controller_client::control_api; -use storage_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode}; -use storage_scrubber::pageserver_physical_gc::GcMode; +use storage_scrubber::garbage::{PurgeMode, find_garbage, purge_garbage}; +use storage_scrubber::pageserver_physical_gc::{GcMode, pageserver_physical_gc}; use storage_scrubber::scan_pageserver_metadata::scan_pageserver_metadata; -use storage_scrubber::scan_safekeeper_metadata::DatabaseOrList; +use storage_scrubber::scan_safekeeper_metadata::{DatabaseOrList, scan_safekeeper_metadata}; use storage_scrubber::tenant_snapshot::SnapshotDownloader; -use storage_scrubber::{find_large_objects, ControllerClientConfig}; use storage_scrubber::{ - init_logging, pageserver_physical_gc::pageserver_physical_gc, - scan_safekeeper_metadata::scan_safekeeper_metadata, BucketConfig, ConsoleConfig, NodeKind, - TraversingDepth, + BucketConfig, ConsoleConfig, ControllerClientConfig, NodeKind, TraversingDepth, + find_large_objects, init_logging, }; - -use clap::{Parser, Subcommand}; use utils::id::TenantId; - use utils::{project_build_tag, project_git_version}; project_git_version!(GIT_VERSION); @@ -173,15 +169,23 @@ async fn main() -> anyhow::Result<()> { if let NodeKind::Safekeeper = node_kind { let db_or_list = match (timeline_lsns, dump_db_connstr) { (Some(timeline_lsns), _) => { - let timeline_lsns = serde_json::from_str(&timeline_lsns).context("parsing timeline_lsns")?; + let timeline_lsns = serde_json::from_str(&timeline_lsns) + .context("parsing timeline_lsns")?; DatabaseOrList::List(timeline_lsns) } (None, Some(dump_db_connstr)) => { - let dump_db_table = dump_db_table.ok_or_else(|| anyhow::anyhow!("dump_db_table not specified"))?; + let dump_db_table = dump_db_table + .ok_or_else(|| anyhow::anyhow!("dump_db_table not specified"))?; let tenant_ids = tenant_ids.iter().map(|tshid| tshid.tenant_id).collect(); - DatabaseOrList::Database { tenant_ids, connstr: dump_db_connstr, table: dump_db_table } + DatabaseOrList::Database { + tenant_ids, + connstr: dump_db_connstr, + table: dump_db_table, + } } - (None, None) => anyhow::bail!("neither `timeline_lsns` specified, nor `dump_db_connstr` and `dump_db_table`"), + (None, None) => anyhow::bail!( + "neither `timeline_lsns` specified, nor `dump_db_connstr` and `dump_db_table`" + ), }; let summary = scan_safekeeper_metadata(bucket_config.clone(), db_or_list).await?; if json { @@ -371,7 +375,9 @@ pub async fn scan_pageserver_metadata_cmd( exit_code: bool, ) -> anyhow::Result<()> { if controller_client.is_none() && post_to_storcon { - return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run")); + return Err(anyhow!( + "Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run" + )); } match scan_pageserver_metadata(bucket_config.clone(), tenant_shard_ids, verbose).await { Err(e) => { diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index 47447d681c..af2407856d 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -1,17 +1,17 @@ use std::str::FromStr; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use async_stream::{stream, try_stream}; use futures::StreamExt; +use pageserver_api::shard::TenantShardId; use remote_storage::{GenericRemoteStorage, ListingMode, ListingObject, RemotePath}; use tokio_stream::Stream; +use utils::id::{TenantId, TimelineId}; use crate::{ - list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target, - TenantShardTimelineId, + RootTarget, S3Target, TenantShardTimelineId, list_objects_with_retries, + stream_objects_with_retries, }; -use pageserver_api::shard::TenantShardId; -use utils::id::{TenantId, TimelineId}; /// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes pub fn stream_tenants<'a>( diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index 063c6bcfb9..c956b1abbc 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -2,22 +2,16 @@ use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::sync::Arc; use std::time::Duration; -use crate::checks::{ - list_tenant_manifests, list_timeline_blobs, BlobDataParseResult, ListTenantManifestResult, - RemoteTenantManifestInfo, -}; -use crate::metadata_stream::{stream_tenant_timelines, stream_tenants}; -use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, MAX_RETRIES}; use async_stream::try_stream; use futures::future::Either; use futures_util::{StreamExt, TryStreamExt}; +use pageserver::tenant::IndexPart; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver::tenant::remote_timeline_client::manifest::OffloadedTimelineManifest; use pageserver::tenant::remote_timeline_client::{ parse_remote_index_path, parse_remote_tenant_manifest_path, remote_layer_path, }; use pageserver::tenant::storage_layer::LayerName; -use pageserver::tenant::IndexPart; use pageserver_api::controller_api::TenantDescribeResponse; use pageserver_api::shard::{ShardIndex, TenantShardId}; use remote_storage::{GenericRemoteStorage, ListingObject, RemotePath}; @@ -25,11 +19,18 @@ use reqwest::Method; use serde::Serialize; use storage_controller_client::control_api; use tokio_util::sync::CancellationToken; -use tracing::{info_span, Instrument}; +use tracing::{Instrument, info_span}; use utils::backoff; use utils::generation::Generation; use utils::id::{TenantId, TenantTimelineId}; +use crate::checks::{ + BlobDataParseResult, ListTenantManifestResult, RemoteTenantManifestInfo, list_tenant_manifests, + list_timeline_blobs, +}; +use crate::metadata_stream::{stream_tenant_timelines, stream_tenants}; +use crate::{BucketConfig, MAX_RETRIES, NodeKind, RootTarget, TenantShardTimelineId, init_remote}; + #[derive(Serialize, Default)] pub struct GcSummary { indices_deleted: usize, diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs index a31fb5b242..ba75f25984 100644 --- a/storage_scrubber/src/scan_pageserver_metadata.rs +++ b/storage_scrubber/src/scan_pageserver_metadata.rs @@ -1,21 +1,22 @@ use std::collections::{HashMap, HashSet}; -use crate::checks::{ - branch_cleanup_and_check_errors, list_timeline_blobs, BlobDataParseResult, - RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis, -}; -use crate::metadata_stream::{stream_tenant_timelines, stream_tenants}; -use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimelineId}; use futures_util::{StreamExt, TryStreamExt}; use pageserver::tenant::remote_timeline_client::remote_layer_path; use pageserver_api::controller_api::MetadataHealthUpdateRequest; use pageserver_api::shard::TenantShardId; use remote_storage::GenericRemoteStorage; use serde::Serialize; -use tracing::{info_span, Instrument}; +use tracing::{Instrument, info_span}; use utils::id::TenantId; use utils::shard::ShardCount; +use crate::checks::{ + BlobDataParseResult, RemoteTimelineBlobData, TenantObjectListing, TimelineAnalysis, + branch_cleanup_and_check_errors, list_timeline_blobs, +}; +use crate::metadata_stream::{stream_tenant_timelines, stream_tenants}; +use crate::{BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, init_remote}; + #[derive(Serialize, Default)] pub struct MetadataSummary { tenant_count: usize, diff --git a/storage_scrubber/src/scan_safekeeper_metadata.rs b/storage_scrubber/src/scan_safekeeper_metadata.rs index 0a4d4266a0..f10d758097 100644 --- a/storage_scrubber/src/scan_safekeeper_metadata.rs +++ b/storage_scrubber/src/scan_safekeeper_metadata.rs @@ -1,23 +1,24 @@ -use std::{collections::HashSet, str::FromStr, sync::Arc}; +use std::collections::HashSet; +use std::str::FromStr; +use std::sync::Arc; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use futures::stream::{StreamExt, TryStreamExt}; use once_cell::sync::OnceCell; use pageserver_api::shard::TenantShardId; -use postgres_ffi::{XLogFileName, PG_TLI}; +use postgres_ffi::{PG_TLI, XLogFileName}; use remote_storage::GenericRemoteStorage; use rustls::crypto::ring; use serde::Serialize; use tokio_postgres::types::PgLsn; use tracing::{debug, error, info}; -use utils::{ - id::{TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; +use crate::cloud_admin_api::CloudAdminApiClient; +use crate::metadata_stream::stream_listing; use crate::{ - cloud_admin_api::CloudAdminApiClient, init_remote, metadata_stream::stream_listing, - BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId, + BucketConfig, ConsoleConfig, NodeKind, RootTarget, TenantShardTimelineId, init_remote, }; /// Generally we should ask safekeepers, but so far we use everywhere default 16MB. diff --git a/storage_scrubber/src/tenant_snapshot.rs b/storage_scrubber/src/tenant_snapshot.rs index 60e79fb859..e17409c20e 100644 --- a/storage_scrubber/src/tenant_snapshot.rs +++ b/storage_scrubber/src/tenant_snapshot.rs @@ -1,25 +1,26 @@ use std::collections::HashMap; use std::sync::Arc; -use crate::checks::{list_timeline_blobs, BlobDataParseResult, RemoteTimelineBlobData}; -use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines}; -use crate::{ - download_object_to_file_s3, init_remote, init_remote_s3, BucketConfig, NodeKind, RootTarget, - TenantShardTimelineId, -}; use anyhow::Context; use async_stream::stream; use aws_sdk_s3::Client; use camino::Utf8PathBuf; use futures::{StreamExt, TryStreamExt}; +use pageserver::tenant::IndexPart; use pageserver::tenant::remote_timeline_client::index::LayerFileMetadata; use pageserver::tenant::storage_layer::LayerName; -use pageserver::tenant::IndexPart; use pageserver_api::shard::TenantShardId; use remote_storage::{GenericRemoteStorage, S3Config}; use utils::generation::Generation; use utils::id::TenantId; +use crate::checks::{BlobDataParseResult, RemoteTimelineBlobData, list_timeline_blobs}; +use crate::metadata_stream::{stream_tenant_shards, stream_tenant_timelines}; +use crate::{ + BucketConfig, NodeKind, RootTarget, TenantShardTimelineId, download_object_to_file_s3, + init_remote, init_remote_s3, +}; + pub struct SnapshotDownloader { s3_client: Arc, s3_root: RootTarget, From 3684162d9fa30535ca52aa570c9a36aafbcdff62 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Wed, 26 Feb 2025 09:19:19 +0000 Subject: [PATCH 019/207] Bump vm-builder v0.37.1 -> v0.42.2 (#10981) Bump version to pick up changes introduced in https://github.com/neondatabase/autoscaling/pull/1286 It's better to have a compute release for this change first, because: - vm-runner changes kernel loglevel from 7 to 6 - vm-builder has a change to bring it back to 7 after startup Previous update: https://github.com/neondatabase/neon/pull/10015 --- .github/workflows/build_and_test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1b706b3f16..132f122b7c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -722,14 +722,14 @@ jobs: - pg: v17 debian: bookworm env: - VM_BUILDER_VERSION: v0.37.1 + VM_BUILDER_VERSION: v0.42.2 steps: - uses: actions/checkout@v4 - name: Downloading vm-builder run: | - curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder -o vm-builder + curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-amd64 -o vm-builder chmod +x vm-builder - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193 @@ -750,7 +750,8 @@ jobs: -size=2G \ -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \ -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ - -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} + -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ + -target-arch=linux/amd64 - name: Pushing vm-compute-node image run: | From 43b109af69678761a507a81397405c23caba3408 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 26 Feb 2025 11:51:07 +0200 Subject: [PATCH 020/207] compute_ctl: Add more detailed tracing spans to startup subroutines (#10979) In local dev environment, these steps take around 100 ms, and they are in the critical path of a compute startup on a compute pool hit. I don't know if it's like that in production, but as first step, add tracing spans to the functions so that they can be measured more easily. --- compute_tools/src/compute.rs | 1 + compute_tools/src/disk_quota.rs | 2 ++ compute_tools/src/swap.rs | 3 ++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index d323ea3dcd..6d882ce997 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -546,6 +546,7 @@ impl ComputeNode { // Fast path for sync_safekeepers. If they're already synced we get the lsn // in one roundtrip. If not, we should do a full sync_safekeepers. + #[instrument(skip_all)] pub fn check_safekeepers_synced(&self, compute_state: &ComputeState) -> Result> { let start_time = Utc::now(); diff --git a/compute_tools/src/disk_quota.rs b/compute_tools/src/disk_quota.rs index e838c5b9fd..1353ab938d 100644 --- a/compute_tools/src/disk_quota.rs +++ b/compute_tools/src/disk_quota.rs @@ -1,9 +1,11 @@ use anyhow::Context; +use tracing::instrument; pub const DISK_QUOTA_BIN: &str = "/neonvm/bin/set-disk-quota"; /// If size_bytes is 0, it disables the quota. Otherwise, it sets filesystem quota to size_bytes. /// `fs_mountpoint` should point to the mountpoint of the filesystem where the quota should be set. +#[instrument] pub fn set_disk_quota(size_bytes: u64, fs_mountpoint: &str) -> anyhow::Result<()> { let size_kb = size_bytes / 1024; // run `/neonvm/bin/set-disk-quota {size_kb} {mountpoint}` diff --git a/compute_tools/src/swap.rs b/compute_tools/src/swap.rs index 024c5b338e..7ba8cb5503 100644 --- a/compute_tools/src/swap.rs +++ b/compute_tools/src/swap.rs @@ -1,10 +1,11 @@ use std::path::Path; use anyhow::{anyhow, Context}; -use tracing::warn; +use tracing::{instrument, warn}; pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap"; +#[instrument] pub fn resize_swap(size_bytes: u64) -> anyhow::Result<()> { // run `/neonvm/bin/resize-swap --once {size_bytes}` // From e452f2a5a3ea9fde0c7bbaa43b42213f529993ac Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 26 Feb 2025 12:06:42 +0200 Subject: [PATCH 021/207] Remove some redundant log lines at postgres startup (#10958) --- compute_tools/src/spec.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 6f28bd9733..9ca67aba44 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -141,7 +141,6 @@ pub fn get_spec_from_control_plane( /// Check `pg_hba.conf` and update if needed to allow external connections. pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { // XXX: consider making it a part of spec.json - info!("checking pg_hba.conf"); let pghba_path = pgdata_path.join("pg_hba.conf"); if config::line_in_file(&pghba_path, PG_HBA_ALL_MD5)? { @@ -156,12 +155,11 @@ pub fn update_pg_hba(pgdata_path: &Path) -> Result<()> { /// Create a standby.signal file pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { // XXX: consider making it a part of spec.json - info!("adding standby.signal"); let signalfile = pgdata_path.join("standby.signal"); if !signalfile.exists() { - info!("created standby.signal"); File::create(signalfile)?; + info!("created standby.signal"); } else { info!("reused pre-existing standby.signal"); } @@ -170,7 +168,6 @@ pub fn add_standby_signal(pgdata_path: &Path) -> Result<()> { #[instrument(skip_all)] pub async fn handle_neon_extension_upgrade(client: &mut Client) -> Result<()> { - info!("handle neon extension upgrade"); let query = "ALTER EXTENSION neon UPDATE"; info!("update neon extension version with query: {}", query); client.simple_query(query).await?; From 40ad42d5568ed60b6f26478511c443a9ea17ffc9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 26 Feb 2025 12:10:05 +0200 Subject: [PATCH 022/207] Silence "sudo: unable to resolve host" messages at compute startup (#10985) --- compute/vm-image-spec-bookworm.yaml | 5 +++++ compute/vm-image-spec-bullseye.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml index 568f0b0444..ff4c3387d9 100644 --- a/compute/vm-image-spec-bookworm.yaml +++ b/compute/vm-image-spec-bookworm.yaml @@ -44,6 +44,11 @@ shutdownHook: | files: - filename: compute_ctl-sudoers content: | + # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all + # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to + # resolve host" log messages that they generate. + Defaults !fqdn + # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD), # regardless of hostname (ALL) diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml index 6617c98599..c001040bc9 100644 --- a/compute/vm-image-spec-bullseye.yaml +++ b/compute/vm-image-spec-bullseye.yaml @@ -44,6 +44,11 @@ shutdownHook: | files: - filename: compute_ctl-sudoers content: | + # Reverse hostname lookup doesn't currently work, and isn't needed anyway when all + # the rules use ALL as the hostname. Avoid the pointless lookups and the "unable to + # resolve host" log messages that they generate. + Defaults !fqdn + # Allow postgres user (which is what compute_ctl runs as) to run /neonvm/bin/resize-swap # and /neonvm/bin/set-disk-quota as root without requiring entering a password (NOPASSWD), # regardless of hostname (ALL) From 0d36f52a6c48a86a46428c6778391acab1b255c0 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Wed, 26 Feb 2025 12:39:34 +0100 Subject: [PATCH 023/207] proxy: Record and export user-agent header (#10955) neondatabase/cloud#24464 --- proxy/src/context/mod.rs | 14 ++++++++ proxy/src/context/parquet.rs | 51 ++++++++++++++------------- proxy/src/serverless/mod.rs | 8 +++++ proxy/src/serverless/sql_over_http.rs | 7 ++++ 4 files changed, 56 insertions(+), 24 deletions(-) diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 74b48a1bea..f87f4e9ef8 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -55,6 +55,7 @@ struct RequestContextInner { dbname: Option, user: Option, application: Option, + user_agent: Option, error_kind: Option, pub(crate) auth_method: Option, jwt_issuer: Option, @@ -100,6 +101,7 @@ impl Clone for RequestContext { dbname: inner.dbname.clone(), user: inner.user.clone(), application: inner.application.clone(), + user_agent: inner.user_agent.clone(), error_kind: inner.error_kind, auth_method: inner.auth_method.clone(), jwt_issuer: inner.jwt_issuer.clone(), @@ -149,6 +151,7 @@ impl RequestContext { dbname: None, user: None, application: None, + user_agent: None, error_kind: None, auth_method: None, jwt_issuer: None, @@ -245,6 +248,13 @@ impl RequestContext { .set_user(user); } + pub(crate) fn set_user_agent(&self, user_agent: Option) { + self.0 + .try_lock() + .expect("should not deadlock") + .set_user_agent(user_agent); + } + pub(crate) fn set_auth_method(&self, auth_method: AuthMethod) { let mut this = self.0.try_lock().expect("should not deadlock"); this.auth_method = Some(auth_method); @@ -384,6 +394,10 @@ impl RequestContextInner { } } + fn set_user_agent(&mut self, user_agent: Option) { + self.user_agent = user_agent; + } + fn set_dbname(&mut self, dbname: DbName) { self.dbname = Some(dbname); } diff --git a/proxy/src/context/parquet.rs b/proxy/src/context/parquet.rs index f029327266..bfab5f34f9 100644 --- a/proxy/src/context/parquet.rs +++ b/proxy/src/context/parquet.rs @@ -82,6 +82,7 @@ pub(crate) struct RequestData { peer_addr: String, username: Option, application_name: Option, + user_agent: Option, endpoint_id: Option, database: Option, project: Option, @@ -128,6 +129,7 @@ impl From<&RequestContextInner> for RequestData { timestamp: value.first_packet.naive_utc(), username: value.user.as_deref().map(String::from), application_name: value.application.as_deref().map(String::from), + user_agent: value.user_agent.as_deref().map(String::from), endpoint_id: value.endpoint_id.as_deref().map(String::from), database: value.dbname.as_deref().map(String::from), project: value.project.as_deref().map(String::from), @@ -522,6 +524,7 @@ mod tests { .unwrap() .naive_utc(), application_name: Some("test".to_owned()), + user_agent: Some("test-user-agent".to_owned()), username: Some(hex::encode(rng.r#gen::<[u8; 4]>())), endpoint_id: Some(hex::encode(rng.r#gen::<[u8; 16]>())), database: Some(hex::encode(rng.r#gen::<[u8; 16]>())), @@ -610,15 +613,15 @@ mod tests { assert_eq!( file_stats, [ - (1313105, 3, 6000), - (1313094, 3, 6000), - (1313153, 3, 6000), - (1313110, 3, 6000), - (1313246, 3, 6000), - (1313083, 3, 6000), - (1312877, 3, 6000), - (1313112, 3, 6000), - (438020, 1, 2000) + (1313953, 3, 6000), + (1313942, 3, 6000), + (1314001, 3, 6000), + (1313958, 3, 6000), + (1314094, 3, 6000), + (1313931, 3, 6000), + (1313725, 3, 6000), + (1313960, 3, 6000), + (438318, 1, 2000) ] ); @@ -650,11 +653,11 @@ mod tests { assert_eq!( file_stats, [ - (1204324, 5, 10000), - (1204048, 5, 10000), - (1204349, 5, 10000), - (1204334, 5, 10000), - (1204588, 5, 10000) + (1205810, 5, 10000), + (1205534, 5, 10000), + (1205835, 5, 10000), + (1205820, 5, 10000), + (1206074, 5, 10000) ] ); @@ -679,15 +682,15 @@ mod tests { assert_eq!( file_stats, [ - (1313105, 3, 6000), - (1313094, 3, 6000), - (1313153, 3, 6000), - (1313110, 3, 6000), - (1313246, 3, 6000), - (1313083, 3, 6000), - (1312877, 3, 6000), - (1313112, 3, 6000), - (438020, 1, 2000) + (1313953, 3, 6000), + (1313942, 3, 6000), + (1314001, 3, 6000), + (1313958, 3, 6000), + (1314094, 3, 6000), + (1313931, 3, 6000), + (1313725, 3, 6000), + (1313960, 3, 6000), + (438318, 1, 2000) ] ); @@ -724,7 +727,7 @@ mod tests { // files are smaller than the size threshold, but they took too long to fill so were flushed early assert_eq!( file_stats, - [(658014, 2, 3001), (657728, 2, 3000), (657524, 2, 2999)] + [(658584, 2, 3001), (658298, 2, 3000), (658094, 2, 2999)] ); tmpdir.close().unwrap(); diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index dd0fb9c5b4..acd6a05718 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -438,6 +438,14 @@ async fn request_handler( &config.region, ); + ctx.set_user_agent( + request + .headers() + .get(hyper::header::USER_AGENT) + .and_then(|h| h.to_str().ok()) + .map(Into::into), + ); + let span = ctx.span(); info!(parent: &span, "performing websocket upgrade"); diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 8babfb5cd2..93dd531f70 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -228,6 +228,13 @@ fn get_conn_info( } } + ctx.set_user_agent( + headers + .get(hyper::header::USER_AGENT) + .and_then(|h| h.to_str().ok()) + .map(Into::into), + ); + let user_info = ComputeUserInfo { endpoint, user: username, From 26bda175513ee40a1bc9c6264af734182ae1251a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 26 Feb 2025 13:12:50 +0100 Subject: [PATCH 024/207] storcon: use the SchedulingPolicy enum in SafekeeperPersistence (#10897) We don't want to serialize to/from string all the time, so use `SchedulingPolicy` in `SafekeeperPersistence` via the use of a wrapper. Stacked atop #10891 --- storage_controller/src/persistence.rs | 38 ++++++++++++++++++++++----- storage_controller/src/safekeeper.rs | 5 ++-- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index d34da0fef0..2e80b48859 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -4,6 +4,8 @@ use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; +use diesel::deserialize::{FromSql, FromSqlRow}; +use diesel::pg::Pg; use diesel::prelude::*; use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; use diesel_async::pooled_connection::bb8::Pool; @@ -1556,7 +1558,33 @@ pub(crate) struct SafekeeperPersistence { pub(crate) port: i32, pub(crate) http_port: i32, pub(crate) availability_zone_id: String, - pub(crate) scheduling_policy: String, + pub(crate) scheduling_policy: SkSchedulingPolicyFromSql, +} + +/// Wrapper struct around [`SkSchedulingPolicy`] because both it and [`FromSql`] are from foreign crates, +/// and we don't want to make [`safekeeper_api`] depend on [`diesel`]. +#[derive(Serialize, Deserialize, FromSqlRow, Eq, PartialEq, Debug, Copy, Clone)] +pub(crate) struct SkSchedulingPolicyFromSql(pub(crate) SkSchedulingPolicy); + +impl From for SkSchedulingPolicyFromSql { + fn from(value: SkSchedulingPolicy) -> Self { + SkSchedulingPolicyFromSql(value) + } +} + +impl FromSql for SkSchedulingPolicyFromSql { + fn from_sql( + bytes: ::RawValue<'_>, + ) -> diesel::deserialize::Result { + let bytes = bytes.as_bytes(); + match core::str::from_utf8(bytes) { + Ok(s) => match SkSchedulingPolicy::from_str(s) { + Ok(policy) => Ok(SkSchedulingPolicyFromSql(policy)), + Err(e) => Err(format!("can't parse: {e}").into()), + }, + Err(e) => Err(format!("invalid UTF-8 for scheduling policy: {e}").into()), + } + } } impl SafekeeperPersistence { @@ -1572,14 +1600,10 @@ impl SafekeeperPersistence { port: upsert.port, http_port: upsert.http_port, availability_zone_id: upsert.availability_zone_id, - scheduling_policy: String::from(scheduling_policy), + scheduling_policy: SkSchedulingPolicyFromSql(scheduling_policy), } } pub(crate) fn as_describe_response(&self) -> Result { - let scheduling_policy = - SkSchedulingPolicy::from_str(&self.scheduling_policy).map_err(|e| { - DatabaseError::Logical(format!("can't construct SkSchedulingPolicy: {e:?}")) - })?; Ok(SafekeeperDescribeResponse { id: NodeId(self.id as u64), region_id: self.region_id.clone(), @@ -1588,7 +1612,7 @@ impl SafekeeperPersistence { port: self.port, http_port: self.http_port, availability_zone_id: self.availability_zone_id.clone(), - scheduling_policy, + scheduling_policy: self.scheduling_policy.0, }) } } diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs index 546fbf0726..9c7e6e0894 100644 --- a/storage_controller/src/safekeeper.rs +++ b/storage_controller/src/safekeeper.rs @@ -1,4 +1,3 @@ -use std::str::FromStr; use std::time::Duration; use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy}; @@ -26,7 +25,7 @@ pub struct Safekeeper { impl Safekeeper { pub(crate) fn from_persistence(skp: SafekeeperPersistence, cancel: CancellationToken) -> Self { - let scheduling_policy = SkSchedulingPolicy::from_str(&skp.scheduling_policy).unwrap(); + let scheduling_policy = skp.scheduling_policy.0; Self { cancel, listen_http_addr: skp.host.clone(), @@ -55,7 +54,7 @@ impl Safekeeper { } pub(crate) fn set_scheduling_policy(&mut self, scheduling_policy: SkSchedulingPolicy) { self.scheduling_policy = scheduling_policy; - self.skp.scheduling_policy = String::from(scheduling_policy); + self.skp.scheduling_policy = scheduling_policy.into(); } /// Perform an operation (which is given a [`SafekeeperClient`]) with retries pub(crate) async fn with_client_retries( From 622a9def6f43da479fc7d30160e7d70239c5731b Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 26 Feb 2025 12:47:13 +0000 Subject: [PATCH 025/207] tests: use generated record lsn instead of hardcoded one (#10990) ... and start the initial reader with the correct lsn Closes https://github.com/neondatabase/neon/issues/10978 --- safekeeper/src/send_interpreted_wal.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index be0c849a5f..e196f91d3c 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -1001,17 +1001,22 @@ mod tests { const WAL_READER_BATCH_SIZE: usize = 8192; let start_lsn = Lsn::from_str("0/149FD18").unwrap(); - let shard_0_start_lsn = Lsn::from_str("0/14AFE10").unwrap(); let env = Env::new(true).unwrap(); + let mut next_record_lsns = Vec::default(); let tli = env .make_timeline(NodeId(1), TenantTimelineId::generate(), start_lsn) .await .unwrap(); let resident_tli = tli.wal_residence_guard().await.unwrap(); - let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None) - .await - .unwrap(); + let end_watch = + Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns)) + .await + .unwrap(); + + assert!(next_record_lsns.len() > 3); + let shard_0_start_lsn = next_record_lsns[3]; + let end_pos = end_watch.get(); let streaming_wal_reader = StreamingWalReader::new( @@ -1064,7 +1069,7 @@ mod tests { ); let reader_state = reader.state(); - let mut reader_fut = std::pin::pin!(reader.run(start_lsn, &None)); + let mut reader_fut = std::pin::pin!(reader.run(shard_0_start_lsn, &None)); loop { let poll = futures::poll!(reader_fut.as_mut()); assert!(poll.is_pending()); From c2a768086df3cb9b3eae91708549d4cfd826ba92 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Wed, 26 Feb 2025 13:50:06 +0100 Subject: [PATCH 026/207] add credentials for pulling containers for the jobs (#10987) Ref: https://github.com/neondatabase/cloud/issues/24939 ## Problem I found that we are missing authorization for some container jobs, that will make them use anonymous pulls. It's not an issue for now, with high enough limits, but that could be an issue when new limits introduced in DockerHub (10 pulls / hour) ## Summary of changes - add credentials for the jobs that run in containers --- .github/workflows/benchmarking.yml | 3 +++ .github/workflows/cloud-regress.yml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index b36ac46f35..dd4941bb21 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -398,6 +398,9 @@ jobs: runs-on: ${{ matrix.runner }} container: image: ${{ matrix.image }} + credentials: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} options: --init # Increase timeout to 8h, default timeout is 6h diff --git a/.github/workflows/cloud-regress.yml b/.github/workflows/cloud-regress.yml index 09d6acd325..606e1c0862 100644 --- a/.github/workflows/cloud-regress.yml +++ b/.github/workflows/cloud-regress.yml @@ -38,6 +38,9 @@ jobs: runs-on: us-east-2 container: image: neondatabase/build-tools:pinned-bookworm + credentials: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} options: --init steps: From f94286f0c929dce82c8903b29688b5bd230b72aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 26 Feb 2025 14:12:26 +0100 Subject: [PATCH 027/207] Upgrade compute_tools and compute_api to edition 2024 (#10983) Updates `compute_tools` and `compute_api` crates to edition 2024. We like to stay on the latest edition if possible. There is no functional changes, however some code changes had to be done to accommodate the edition's breaking changes. The PR has three commits: * the first commit updates the named crates to edition 2024 and appeases `cargo clippy` by changing code. * the second commit performs a `cargo fmt` that does some minor changes (not many) * the third commit performs a cargo fmt with nightly options to reorder imports as a one-time thing. it's completely optional, but I offer it here for the compute team to review it. I'd like to hear opinions about the third commit, if it's wanted and felt worth the diff or not. I think most attention should be put onto the first commit. Part of #10918 --- compute_tools/Cargo.toml | 2 +- compute_tools/src/bin/compute_ctl.rs | 29 ++++++----- compute_tools/src/bin/fast_import.rs | 10 ++-- .../src/bin/fast_import/aws_s3_sync.rs | 3 +- compute_tools/src/bin/fast_import/s3_uri.rs | 3 +- compute_tools/src/catalog.rs | 18 +++---- compute_tools/src/checker.rs | 2 +- compute_tools/src/compute.rs | 48 +++++++++---------- compute_tools/src/config.rs | 5 +- compute_tools/src/configurator.rs | 3 +- compute_tools/src/extension_server.rs | 13 +++-- compute_tools/src/http/extract/json.rs | 3 +- compute_tools/src/http/extract/path.rs | 6 ++- compute_tools/src/http/extract/query.rs | 6 ++- compute_tools/src/http/mod.rs | 6 ++- .../src/http/routes/check_writability.rs | 7 ++- compute_tools/src/http/routes/configure.rs | 16 +++---- .../src/http/routes/database_schema.rs | 16 ++++--- .../src/http/routes/dbs_and_roles.rs | 7 ++- .../src/http/routes/extension_server.rs | 16 ++----- compute_tools/src/http/routes/extensions.rs | 16 +++---- compute_tools/src/http/routes/failpoints.rs | 3 +- compute_tools/src/http/routes/grants.rs | 16 +++---- compute_tools/src/http/routes/insights.rs | 6 ++- compute_tools/src/http/routes/metrics.rs | 8 ++-- compute_tools/src/http/routes/metrics_json.rs | 6 ++- compute_tools/src/http/routes/status.rs | 10 ++-- compute_tools/src/http/routes/terminate.rs | 12 ++--- compute_tools/src/http/server.rs | 27 +++++------ compute_tools/src/installed_extensions.rs | 2 +- compute_tools/src/lsn_lease.rs | 20 ++++---- compute_tools/src/metrics.rs | 2 +- compute_tools/src/monitor.rs | 7 +-- compute_tools/src/pg_helpers.rs | 5 +- compute_tools/src/spec.rs | 20 ++++---- compute_tools/src/spec_apply.rs | 15 +++--- compute_tools/src/swap.rs | 2 +- compute_tools/tests/config_test.rs | 2 +- libs/compute_api/Cargo.toml | 2 +- libs/compute_api/src/requests.rs | 9 ++-- libs/compute_api/src/responses.rs | 6 +-- libs/compute_api/src/spec.rs | 8 ++-- 42 files changed, 215 insertions(+), 208 deletions(-) diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index c276996df5..ba2c304141 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "compute_tools" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [features] diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 1cdae718fe..efe707cb7c 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -40,34 +40,33 @@ use std::path::Path; use std::process::exit; use std::str::FromStr; use std::sync::atomic::Ordering; -use std::sync::{mpsc, Arc, Condvar, Mutex, RwLock}; -use std::{thread, time::Duration}; +use std::sync::{Arc, Condvar, Mutex, RwLock, mpsc}; +use std::thread; +use std::time::Duration; use anyhow::{Context, Result}; use chrono::Utc; use clap::Parser; -use compute_tools::disk_quota::set_disk_quota; -use compute_tools::http::server::Server; -use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static; -use signal_hook::consts::{SIGQUIT, SIGTERM}; -use signal_hook::{consts::SIGINT, iterator::Signals}; -use tracing::{error, info, warn}; -use url::Url; - use compute_api::responses::{ComputeCtlConfig, ComputeStatus}; use compute_api::spec::ComputeSpec; - use compute_tools::compute::{ - forward_termination_signal, ComputeNode, ComputeState, ParsedSpec, PG_PID, + ComputeNode, ComputeState, PG_PID, ParsedSpec, forward_termination_signal, }; use compute_tools::configurator::launch_configurator; +use compute_tools::disk_quota::set_disk_quota; use compute_tools::extension_server::get_pg_version_string; +use compute_tools::http::server::Server; use compute_tools::logger::*; +use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static; use compute_tools::monitor::launch_monitor; use compute_tools::params::*; use compute_tools::spec::*; use compute_tools::swap::resize_swap; -use rlimit::{setrlimit, Resource}; +use rlimit::{Resource, setrlimit}; +use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM}; +use signal_hook::iterator::Signals; +use tracing::{error, info, warn}; +use url::Url; use utils::failpoint_support; // this is an arbitrary build tag. Fine as a default / for testing purposes @@ -149,6 +148,8 @@ struct Cli { fn main() -> Result<()> { let cli = Cli::parse(); + let scenario = failpoint_support::init(); + // For historical reasons, the main thread that processes the spec and launches postgres // is synchronous, but we always have this tokio runtime available and we "enter" it so // that you can use tokio::spawn() and tokio::runtime::Handle::current().block_on(...) @@ -160,8 +161,6 @@ fn main() -> Result<()> { let build_tag = runtime.block_on(init())?; - let scenario = failpoint_support::init(); - // enable core dumping for all child processes setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?; diff --git a/compute_tools/src/bin/fast_import.rs b/compute_tools/src/bin/fast_import.rs index 585f3e4e1d..47558be7a0 100644 --- a/compute_tools/src/bin/fast_import.rs +++ b/compute_tools/src/bin/fast_import.rs @@ -25,13 +25,13 @@ //! docker push localhost:3030/localregistry/compute-node-v14:latest //! ``` -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use aws_config::BehaviorVersion; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; -use compute_tools::extension_server::{get_pg_version, PostgresMajorVersion}; +use compute_tools::extension_server::{PostgresMajorVersion, get_pg_version}; use nix::unistd::Pid; -use tracing::{error, info, info_span, warn, Instrument}; +use tracing::{Instrument, error, info, info_span, warn}; use utils::fs_ext::is_directory_empty; #[path = "fast_import/aws_s3_sync.rs"] @@ -558,7 +558,9 @@ async fn cmd_dumprestore( decode_connstring(kms_client.as_ref().unwrap(), &key_id, dest_ciphertext) .await? } else { - bail!("destination connection string must be provided in spec for dump_restore command"); + bail!( + "destination connection string must be provided in spec for dump_restore command" + ); }; (source, dest) diff --git a/compute_tools/src/bin/fast_import/aws_s3_sync.rs b/compute_tools/src/bin/fast_import/aws_s3_sync.rs index 1be10b36d6..d8d007da71 100644 --- a/compute_tools/src/bin/fast_import/aws_s3_sync.rs +++ b/compute_tools/src/bin/fast_import/aws_s3_sync.rs @@ -1,11 +1,10 @@ use camino::{Utf8Path, Utf8PathBuf}; use tokio::task::JoinSet; +use tracing::{info, warn}; use walkdir::WalkDir; use super::s3_uri::S3Uri; -use tracing::{info, warn}; - const MAX_PARALLEL_UPLOADS: usize = 10; /// Upload all files from 'local' to 'remote' diff --git a/compute_tools/src/bin/fast_import/s3_uri.rs b/compute_tools/src/bin/fast_import/s3_uri.rs index 52bbef420f..cf4dab7c02 100644 --- a/compute_tools/src/bin/fast_import/s3_uri.rs +++ b/compute_tools/src/bin/fast_import/s3_uri.rs @@ -1,6 +1,7 @@ -use anyhow::Result; use std::str::FromStr; +use anyhow::Result; + /// Struct to hold parsed S3 components #[derive(Debug, Clone, PartialEq, Eq)] pub struct S3Uri { diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs index 28b10ce21c..2a7f56e6fc 100644 --- a/compute_tools/src/catalog.rs +++ b/compute_tools/src/catalog.rs @@ -1,18 +1,20 @@ +use std::path::Path; +use std::process::Stdio; +use std::result::Result; +use std::sync::Arc; + +use compute_api::responses::CatalogObjects; use futures::Stream; use postgres::NoTls; -use std::{path::Path, process::Stdio, result::Result, sync::Arc}; -use tokio::{ - io::{AsyncBufReadExt, BufReader}, - process::Command, - spawn, -}; +use tokio::io::{AsyncBufReadExt, BufReader}; +use tokio::process::Command; +use tokio::spawn; use tokio_stream::{self as stream, StreamExt}; use tokio_util::codec::{BytesCodec, FramedRead}; use tracing::warn; use crate::compute::ComputeNode; use crate::pg_helpers::{get_existing_dbs_async, get_existing_roles_async, postgres_conf_for_db}; -use compute_api::responses::CatalogObjects; pub async fn get_dbs_and_roles(compute: &Arc) -> anyhow::Result { let conf = compute.get_tokio_conn_conf(Some("compute_ctl:get_dbs_and_roles")); @@ -55,7 +57,7 @@ pub enum SchemaDumpError { pub async fn get_database_schema( compute: &Arc, dbname: &str, -) -> Result>, SchemaDumpError> { +) -> Result> + use<>, SchemaDumpError> { let pgbin = &compute.pgbin; let basepath = Path::new(pgbin).parent().unwrap(); let pgdump = basepath.join("pg_dump"); diff --git a/compute_tools/src/checker.rs b/compute_tools/src/checker.rs index 62d61a8bc9..e4207876ac 100644 --- a/compute_tools/src/checker.rs +++ b/compute_tools/src/checker.rs @@ -1,4 +1,4 @@ -use anyhow::{anyhow, Ok, Result}; +use anyhow::{Ok, Result, anyhow}; use tokio_postgres::NoTls; use tracing::{error, instrument, warn}; diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 6d882ce997..e3c70ba622 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1,42 +1,37 @@ use std::collections::{HashMap, HashSet}; -use std::env; -use std::fs; use std::iter::once; -use std::os::unix::fs::{symlink, PermissionsExt}; +use std::os::unix::fs::{PermissionsExt, symlink}; use std::path::Path; use std::process::{Command, Stdio}; use std::str::FromStr; -use std::sync::atomic::AtomicU32; -use std::sync::atomic::Ordering; +use std::sync::atomic::{AtomicU32, Ordering}; use std::sync::{Arc, Condvar, Mutex, RwLock}; -use std::time::Duration; -use std::time::Instant; +use std::time::{Duration, Instant}; +use std::{env, fs}; use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; -use compute_api::spec::{Database, PgIdent, Role}; +use compute_api::privilege::Privilege; +use compute_api::responses::{ComputeMetrics, ComputeStatus}; +use compute_api::spec::{ + ComputeFeature, ComputeMode, ComputeSpec, Database, ExtVersion, PgIdent, Role, +}; +use futures::StreamExt; use futures::future::join_all; use futures::stream::FuturesUnordered; -use futures::StreamExt; +use nix::sys::signal::{Signal, kill}; use nix::unistd::Pid; use postgres; -use postgres::error::SqlState; use postgres::NoTls; +use postgres::error::SqlState; +use remote_storage::{DownloadError, RemotePath}; +use tokio::spawn; use tracing::{debug, error, info, instrument, warn}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; - -use compute_api::privilege::Privilege; -use compute_api::responses::{ComputeMetrics, ComputeStatus}; -use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion}; use utils::measured_stream::MeasuredReader; -use nix::sys::signal::{kill, Signal}; -use remote_storage::{DownloadError, RemotePath}; -use tokio::spawn; - use crate::installed_extensions::get_installed_extensions; -use crate::local_proxy; use crate::pg_helpers::*; use crate::spec::*; use crate::spec_apply::ApplySpecPhase::{ @@ -45,13 +40,12 @@ use crate::spec_apply::ApplySpecPhase::{ HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase, }; -use crate::spec_apply::PerDatabasePhase; use crate::spec_apply::PerDatabasePhase::{ ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension, }; -use crate::spec_apply::{apply_operations, MutableApplyContext, DB}; +use crate::spec_apply::{DB, MutableApplyContext, PerDatabasePhase, apply_operations}; use crate::sync_sk::{check_if_synced, ping_safekeeper}; -use crate::{config, extension_server}; +use crate::{config, extension_server, local_proxy}; pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0); pub static PG_PID: AtomicU32 = AtomicU32::new(0); @@ -1318,7 +1312,7 @@ impl ComputeNode { // Merge-apply spec & changes to PostgreSQL state. self.apply_spec_sql(spec.clone(), conf.clone(), max_concurrent_connections)?; - if let Some(ref local_proxy) = &spec.clone().local_proxy_config { + if let Some(local_proxy) = &spec.clone().local_proxy_config { info!("configuring local_proxy"); local_proxy::configure(local_proxy).context("apply_config local_proxy")?; } @@ -1538,7 +1532,9 @@ impl ComputeNode { &postgresql_conf_path, "neon.disable_logical_replication_subscribers=false", )? { - info!("updated postgresql.conf to set neon.disable_logical_replication_subscribers=false"); + info!( + "updated postgresql.conf to set neon.disable_logical_replication_subscribers=false" + ); } self.pg_reload_conf()?; } @@ -1765,7 +1761,9 @@ LIMIT 100", info!("extension already downloaded, skipping re-download"); return Ok(0); } else if start_time_delta < HANG_TIMEOUT && !first_try { - info!("download {ext_archive_name} already started by another process, hanging untill completion or timeout"); + info!( + "download {ext_archive_name} already started by another process, hanging untill completion or timeout" + ); let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(500)); loop { info!("waiting for download"); diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index e1bdfffa54..e8056ec7eb 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -4,11 +4,10 @@ use std::io::prelude::*; use std::path::Path; use anyhow::Result; - -use crate::pg_helpers::escape_conf_value; -use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize}; use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption}; +use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value}; + /// Check that `line` is inside a text file and put it there if it is not. /// Create file if it doesn't exist. pub fn line_in_file(path: &Path, line: &str) -> Result { diff --git a/compute_tools/src/configurator.rs b/compute_tools/src/configurator.rs index d88f26ca20..d97bd37285 100644 --- a/compute_tools/src/configurator.rs +++ b/compute_tools/src/configurator.rs @@ -1,9 +1,8 @@ use std::sync::Arc; use std::thread; -use tracing::{error, info, instrument}; - use compute_api::responses::ComputeStatus; +use tracing::{error, info, instrument}; use crate::compute::ComputeNode; diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs index 00f46386e7..77e98359ab 100644 --- a/compute_tools/src/extension_server.rs +++ b/compute_tools/src/extension_server.rs @@ -71,15 +71,15 @@ More specifically, here is an example ext_index.json } } */ -use anyhow::Result; -use anyhow::{bail, Context}; +use std::path::Path; +use std::str; + +use anyhow::{Context, Result, bail}; use bytes::Bytes; use compute_api::spec::RemoteExtSpec; use regex::Regex; use remote_storage::*; use reqwest::StatusCode; -use std::path::Path; -use std::str; use tar::Archive; use tracing::info; use tracing::log::warn; @@ -244,7 +244,10 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) { info!("writing file {:?}{:?}", control_path, control_content); std::fs::write(control_path, control_content).unwrap(); } else { - warn!("control file {:?} exists both locally and remotely. ignoring the remote version.", control_path); + warn!( + "control file {:?} exists both locally and remotely. ignoring the remote version.", + control_path + ); } } } diff --git a/compute_tools/src/http/extract/json.rs b/compute_tools/src/http/extract/json.rs index 104cc25d5f..1d32e4ff37 100644 --- a/compute_tools/src/http/extract/json.rs +++ b/compute_tools/src/http/extract/json.rs @@ -1,6 +1,7 @@ use std::ops::{Deref, DerefMut}; -use axum::extract::{rejection::JsonRejection, FromRequest, Request}; +use axum::extract::rejection::JsonRejection; +use axum::extract::{FromRequest, Request}; use compute_api::responses::GenericAPIError; use http::StatusCode; diff --git a/compute_tools/src/http/extract/path.rs b/compute_tools/src/http/extract/path.rs index 09637a96a4..45970cff3d 100644 --- a/compute_tools/src/http/extract/path.rs +++ b/compute_tools/src/http/extract/path.rs @@ -1,8 +1,10 @@ use std::ops::{Deref, DerefMut}; -use axum::extract::{rejection::PathRejection, FromRequestParts}; +use axum::extract::FromRequestParts; +use axum::extract::rejection::PathRejection; use compute_api::responses::GenericAPIError; -use http::{request::Parts, StatusCode}; +use http::StatusCode; +use http::request::Parts; /// Custom `Path` extractor, so that we can format errors into /// `JsonResponse`. diff --git a/compute_tools/src/http/extract/query.rs b/compute_tools/src/http/extract/query.rs index 9dec3642cf..b8079ea770 100644 --- a/compute_tools/src/http/extract/query.rs +++ b/compute_tools/src/http/extract/query.rs @@ -1,8 +1,10 @@ use std::ops::{Deref, DerefMut}; -use axum::extract::{rejection::QueryRejection, FromRequestParts}; +use axum::extract::FromRequestParts; +use axum::extract::rejection::QueryRejection; use compute_api::responses::GenericAPIError; -use http::{request::Parts, StatusCode}; +use http::StatusCode; +use http::request::Parts; /// Custom `Query` extractor, so that we can format errors into /// `JsonResponse`. diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs index 93eb6ef5b7..d182278174 100644 --- a/compute_tools/src/http/mod.rs +++ b/compute_tools/src/http/mod.rs @@ -1,6 +1,8 @@ -use axum::{body::Body, response::Response}; +use axum::body::Body; +use axum::response::Response; use compute_api::responses::{ComputeStatus, GenericAPIError}; -use http::{header::CONTENT_TYPE, StatusCode}; +use http::StatusCode; +use http::header::CONTENT_TYPE; use serde::Serialize; use tracing::error; diff --git a/compute_tools/src/http/routes/check_writability.rs b/compute_tools/src/http/routes/check_writability.rs index d7feb055e9..5a12686fa8 100644 --- a/compute_tools/src/http/routes/check_writability.rs +++ b/compute_tools/src/http/routes/check_writability.rs @@ -1,10 +1,13 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; +use axum::extract::State; +use axum::response::Response; use compute_api::responses::ComputeStatus; use http::StatusCode; -use crate::{checker::check_writability, compute::ComputeNode, http::JsonResponse}; +use crate::checker::check_writability; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; /// Check that the compute is currently running. pub(in crate::http) async fn is_writable(State(compute): State>) -> Response { diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs index 2546cbc344..a2892196b7 100644 --- a/compute_tools/src/http/routes/configure.rs +++ b/compute_tools/src/http/routes/configure.rs @@ -1,18 +1,16 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; -use compute_api::{ - requests::ConfigurationRequest, - responses::{ComputeStatus, ComputeStatusResponse}, -}; +use axum::extract::State; +use axum::response::Response; +use compute_api::requests::ConfigurationRequest; +use compute_api::responses::{ComputeStatus, ComputeStatusResponse}; use http::StatusCode; use tokio::task; use tracing::info; -use crate::{ - compute::{ComputeNode, ParsedSpec}, - http::{extract::Json, JsonResponse}, -}; +use crate::compute::{ComputeNode, ParsedSpec}; +use crate::http::JsonResponse; +use crate::http::extract::Json; // Accept spec in JSON format and request compute configuration. If anything // goes wrong after we set the compute status to `ConfigurationPending` and diff --git a/compute_tools/src/http/routes/database_schema.rs b/compute_tools/src/http/routes/database_schema.rs index fd716272dc..1f6ca4b79d 100644 --- a/compute_tools/src/http/routes/database_schema.rs +++ b/compute_tools/src/http/routes/database_schema.rs @@ -1,14 +1,16 @@ use std::sync::Arc; -use axum::{body::Body, extract::State, response::Response}; -use http::{header::CONTENT_TYPE, StatusCode}; +use axum::body::Body; +use axum::extract::State; +use axum::response::Response; +use http::StatusCode; +use http::header::CONTENT_TYPE; use serde::Deserialize; -use crate::{ - catalog::{get_database_schema, SchemaDumpError}, - compute::ComputeNode, - http::{extract::Query, JsonResponse}, -}; +use crate::catalog::{SchemaDumpError, get_database_schema}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; +use crate::http::extract::Query; #[derive(Debug, Clone, Deserialize)] pub(in crate::http) struct DatabaseSchemaParams { diff --git a/compute_tools/src/http/routes/dbs_and_roles.rs b/compute_tools/src/http/routes/dbs_and_roles.rs index 4843c3fab4..790fe0dfe3 100644 --- a/compute_tools/src/http/routes/dbs_and_roles.rs +++ b/compute_tools/src/http/routes/dbs_and_roles.rs @@ -1,9 +1,12 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; +use axum::extract::State; +use axum::response::Response; use http::StatusCode; -use crate::{catalog::get_dbs_and_roles, compute::ComputeNode, http::JsonResponse}; +use crate::catalog::get_dbs_and_roles; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; /// Get the databases and roles from the compute. pub(in crate::http) async fn get_catalog_objects( diff --git a/compute_tools/src/http/routes/extension_server.rs b/compute_tools/src/http/routes/extension_server.rs index 5cc9b6d277..b0265d1e99 100644 --- a/compute_tools/src/http/routes/extension_server.rs +++ b/compute_tools/src/http/routes/extension_server.rs @@ -1,19 +1,13 @@ use std::sync::Arc; -use axum::{ - extract::State, - response::{IntoResponse, Response}, -}; +use axum::extract::State; +use axum::response::{IntoResponse, Response}; use http::StatusCode; use serde::Deserialize; -use crate::{ - compute::ComputeNode, - http::{ - extract::{Path, Query}, - JsonResponse, - }, -}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; +use crate::http::extract::{Path, Query}; #[derive(Debug, Clone, Deserialize)] pub(in crate::http) struct ExtensionServerParams { diff --git a/compute_tools/src/http/routes/extensions.rs b/compute_tools/src/http/routes/extensions.rs index 1fc03b9109..910e1fa155 100644 --- a/compute_tools/src/http/routes/extensions.rs +++ b/compute_tools/src/http/routes/extensions.rs @@ -1,16 +1,14 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; -use compute_api::{ - requests::ExtensionInstallRequest, - responses::{ComputeStatus, ExtensionInstallResponse}, -}; +use axum::extract::State; +use axum::response::Response; +use compute_api::requests::ExtensionInstallRequest; +use compute_api::responses::{ComputeStatus, ExtensionInstallResponse}; use http::StatusCode; -use crate::{ - compute::ComputeNode, - http::{extract::Json, JsonResponse}, -}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; +use crate::http::extract::Json; /// Install a extension. pub(in crate::http) async fn install_extension( diff --git a/compute_tools/src/http/routes/failpoints.rs b/compute_tools/src/http/routes/failpoints.rs index 836417d784..8f5da99963 100644 --- a/compute_tools/src/http/routes/failpoints.rs +++ b/compute_tools/src/http/routes/failpoints.rs @@ -17,7 +17,8 @@ pub struct FailpointConfig { pub actions: String, } -use crate::http::{extract::Json, JsonResponse}; +use crate::http::JsonResponse; +use crate::http::extract::Json; /// Configure failpoints for testing purposes. pub(in crate::http) async fn configure_failpoints( diff --git a/compute_tools/src/http/routes/grants.rs b/compute_tools/src/http/routes/grants.rs index 3f67f011e5..267dcbb27e 100644 --- a/compute_tools/src/http/routes/grants.rs +++ b/compute_tools/src/http/routes/grants.rs @@ -1,16 +1,14 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; -use compute_api::{ - requests::SetRoleGrantsRequest, - responses::{ComputeStatus, SetRoleGrantsResponse}, -}; +use axum::extract::State; +use axum::response::Response; +use compute_api::requests::SetRoleGrantsRequest; +use compute_api::responses::{ComputeStatus, SetRoleGrantsResponse}; use http::StatusCode; -use crate::{ - compute::ComputeNode, - http::{extract::Json, JsonResponse}, -}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; +use crate::http::extract::Json; /// Add grants for a role. pub(in crate::http) async fn add_grant( diff --git a/compute_tools/src/http/routes/insights.rs b/compute_tools/src/http/routes/insights.rs index 6b03a461c3..b1ba67161e 100644 --- a/compute_tools/src/http/routes/insights.rs +++ b/compute_tools/src/http/routes/insights.rs @@ -1,10 +1,12 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; +use axum::extract::State; +use axum::response::Response; use compute_api::responses::ComputeStatus; use http::StatusCode; -use crate::{compute::ComputeNode, http::JsonResponse}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; /// Collect current Postgres usage insights. pub(in crate::http) async fn get_insights(State(compute): State>) -> Response { diff --git a/compute_tools/src/http/routes/metrics.rs b/compute_tools/src/http/routes/metrics.rs index 13150a7588..da8d8b20a5 100644 --- a/compute_tools/src/http/routes/metrics.rs +++ b/compute_tools/src/http/routes/metrics.rs @@ -1,10 +1,12 @@ -use axum::{body::Body, response::Response}; -use http::header::CONTENT_TYPE; +use axum::body::Body; +use axum::response::Response; use http::StatusCode; +use http::header::CONTENT_TYPE; use metrics::proto::MetricFamily; use metrics::{Encoder, TextEncoder}; -use crate::{http::JsonResponse, metrics::collect}; +use crate::http::JsonResponse; +use crate::metrics::collect; /// Expose Prometheus metrics. pub(in crate::http) async fn get_metrics() -> Response { diff --git a/compute_tools/src/http/routes/metrics_json.rs b/compute_tools/src/http/routes/metrics_json.rs index 0709db5011..bc35ee2645 100644 --- a/compute_tools/src/http/routes/metrics_json.rs +++ b/compute_tools/src/http/routes/metrics_json.rs @@ -1,9 +1,11 @@ use std::sync::Arc; -use axum::{extract::State, response::Response}; +use axum::extract::State; +use axum::response::Response; use http::StatusCode; -use crate::{compute::ComputeNode, http::JsonResponse}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; /// Get startup metrics. pub(in crate::http) async fn get_metrics(State(compute): State>) -> Response { diff --git a/compute_tools/src/http/routes/status.rs b/compute_tools/src/http/routes/status.rs index d64d53a58f..8ed1299d6b 100644 --- a/compute_tools/src/http/routes/status.rs +++ b/compute_tools/src/http/routes/status.rs @@ -1,9 +1,13 @@ -use std::{ops::Deref, sync::Arc}; +use std::ops::Deref; +use std::sync::Arc; -use axum::{extract::State, http::StatusCode, response::Response}; +use axum::extract::State; +use axum::http::StatusCode; +use axum::response::Response; use compute_api::responses::ComputeStatusResponse; -use crate::{compute::ComputeNode, http::JsonResponse}; +use crate::compute::ComputeNode; +use crate::http::JsonResponse; /// Retrieve the state of the comute. pub(in crate::http) async fn get_status(State(compute): State>) -> Response { diff --git a/compute_tools/src/http/routes/terminate.rs b/compute_tools/src/http/routes/terminate.rs index 7acd84f236..2c24d4ad6b 100644 --- a/compute_tools/src/http/routes/terminate.rs +++ b/compute_tools/src/http/routes/terminate.rs @@ -1,18 +1,14 @@ use std::sync::Arc; -use axum::{ - extract::State, - response::{IntoResponse, Response}, -}; +use axum::extract::State; +use axum::response::{IntoResponse, Response}; use compute_api::responses::ComputeStatus; use http::StatusCode; use tokio::task; use tracing::info; -use crate::{ - compute::{forward_termination_signal, ComputeNode}, - http::JsonResponse, -}; +use crate::compute::{ComputeNode, forward_termination_signal}; +use crate::http::JsonResponse; /// Terminate the compute. pub(in crate::http) async fn terminate(State(compute): State>) -> Response { diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index a523ecd96f..efd18afc78 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -1,23 +1,20 @@ -use std::{ - fmt::Display, - net::{IpAddr, Ipv6Addr, SocketAddr}, - sync::Arc, - time::Duration, -}; +use std::fmt::Display; +use std::net::{IpAddr, Ipv6Addr, SocketAddr}; +use std::sync::Arc; +use std::time::Duration; use anyhow::Result; -use axum::{ - extract::Request, - middleware::{self, Next}, - response::{IntoResponse, Response}, - routing::{get, post}, - Router, -}; +use axum::Router; +use axum::extract::Request; +use axum::middleware::{self, Next}; +use axum::response::{IntoResponse, Response}; +use axum::routing::{get, post}; use http::StatusCode; use tokio::net::TcpListener; use tower::ServiceBuilder; -use tower_http::{request_id::PropagateRequestIdLayer, trace::TraceLayer}; -use tracing::{debug, error, info, Span}; +use tower_http::request_id::PropagateRequestIdLayer; +use tower_http::trace::TraceLayer; +use tracing::{Span, debug, error, info}; use uuid::Uuid; use super::routes::{ diff --git a/compute_tools/src/installed_extensions.rs b/compute_tools/src/installed_extensions.rs index 173dbf40b0..6921505466 100644 --- a/compute_tools/src/installed_extensions.rs +++ b/compute_tools/src/installed_extensions.rs @@ -1,7 +1,7 @@ -use compute_api::responses::{InstalledExtension, InstalledExtensions}; use std::collections::HashMap; use anyhow::Result; +use compute_api::responses::{InstalledExtension, InstalledExtensions}; use postgres::{Client, NoTls}; use crate::metrics::INSTALLED_EXTENSIONS; diff --git a/compute_tools/src/lsn_lease.rs b/compute_tools/src/lsn_lease.rs index 3061d387a5..b4ec675ff4 100644 --- a/compute_tools/src/lsn_lease.rs +++ b/compute_tools/src/lsn_lease.rs @@ -1,17 +1,15 @@ -use anyhow::bail; -use anyhow::Result; -use postgres::{NoTls, SimpleQueryMessage}; -use std::time::SystemTime; -use std::{str::FromStr, sync::Arc, thread, time::Duration}; -use utils::id::TenantId; -use utils::id::TimelineId; +use std::str::FromStr; +use std::sync::Arc; +use std::thread; +use std::time::{Duration, SystemTime}; +use anyhow::{Result, bail}; use compute_api::spec::ComputeMode; +use postgres::{NoTls, SimpleQueryMessage}; use tracing::{info, warn}; -use utils::{ - lsn::Lsn, - shard::{ShardCount, ShardNumber, TenantShardId}, -}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; +use utils::shard::{ShardCount, ShardNumber, TenantShardId}; use crate::compute::ComputeNode; diff --git a/compute_tools/src/metrics.rs b/compute_tools/src/metrics.rs index 870b294d08..bc96e5074c 100644 --- a/compute_tools/src/metrics.rs +++ b/compute_tools/src/metrics.rs @@ -1,6 +1,6 @@ use metrics::core::Collector; use metrics::proto::MetricFamily; -use metrics::{register_int_counter_vec, register_uint_gauge_vec, IntCounterVec, UIntGaugeVec}; +use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec}; use once_cell::sync::Lazy; pub(crate) static INSTALLED_EXTENSIONS: Lazy = Lazy::new(|| { diff --git a/compute_tools/src/monitor.rs b/compute_tools/src/monitor.rs index 184f380a8d..248505e473 100644 --- a/compute_tools/src/monitor.rs +++ b/compute_tools/src/monitor.rs @@ -1,13 +1,14 @@ use std::sync::Arc; -use std::{thread, time::Duration}; +use std::thread; +use std::time::Duration; use chrono::{DateTime, Utc}; +use compute_api::responses::ComputeStatus; +use compute_api::spec::ComputeFeature; use postgres::{Client, NoTls}; use tracing::{debug, error, info, warn}; use crate::compute::ComputeNode; -use compute_api::responses::ComputeStatus; -use compute_api::spec::ComputeFeature; const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500); diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 86fcf99085..5a2e305e1d 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -9,7 +9,8 @@ use std::process::Child; use std::str::FromStr; use std::time::{Duration, Instant}; -use anyhow::{bail, Result}; +use anyhow::{Result, bail}; +use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role}; use futures::StreamExt; use ini::Ini; use notify::{RecursiveMode, Watcher}; @@ -21,8 +22,6 @@ use tokio_postgres; use tokio_postgres::NoTls; use tracing::{debug, error, info, instrument}; -use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role}; - const POSTGRES_WAIT_TIMEOUT: Duration = Duration::from_millis(60 * 1000); // milliseconds /// Escape a string for including it in a SQL literal. diff --git a/compute_tools/src/spec.rs b/compute_tools/src/spec.rs index 9ca67aba44..1d19f2738d 100644 --- a/compute_tools/src/spec.rs +++ b/compute_tools/src/spec.rs @@ -1,20 +1,20 @@ -use anyhow::{anyhow, bail, Result}; -use reqwest::StatusCode; use std::fs::File; use std::path::Path; -use tokio_postgres::Client; -use tracing::{error, info, instrument, warn}; - -use crate::config; -use crate::metrics::{CPlaneRequestRPC, CPLANE_REQUESTS_TOTAL, UNKNOWN_HTTP_STATUS}; -use crate::migration::MigrationRunner; -use crate::params::PG_HBA_ALL_MD5; -use crate::pg_helpers::*; +use anyhow::{Result, anyhow, bail}; use compute_api::responses::{ ComputeCtlConfig, ControlPlaneComputeStatus, ControlPlaneSpecResponse, }; use compute_api::spec::ComputeSpec; +use reqwest::StatusCode; +use tokio_postgres::Client; +use tracing::{error, info, instrument, warn}; + +use crate::config; +use crate::metrics::{CPLANE_REQUESTS_TOTAL, CPlaneRequestRPC, UNKNOWN_HTTP_STATUS}; +use crate::migration::MigrationRunner; +use crate::params::PG_HBA_ALL_MD5; +use crate::pg_helpers::*; // Do control plane request and return response if any. In case of error it // returns a bool flag indicating whether it makes sense to retry the request diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs index c4416480d8..b4e084fd91 100644 --- a/compute_tools/src/spec_apply.rs +++ b/compute_tools/src/spec_apply.rs @@ -1,18 +1,18 @@ use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Formatter}; use std::future::Future; -use std::iter::empty; -use std::iter::once; +use std::iter::{empty, once}; use std::sync::Arc; -use crate::compute::construct_superuser_query; -use crate::pg_helpers::{escape_literal, DatabaseExt, Escaping, GenericOptionsSearch, RoleExt}; use anyhow::Result; use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role}; use futures::future::join_all; use tokio::sync::RwLock; use tokio_postgres::Client; -use tracing::{debug, info_span, warn, Instrument}; +use tracing::{Instrument, debug, info_span, warn}; + +use crate::compute::construct_superuser_query; +use crate::pg_helpers::{DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal}; #[derive(Clone)] pub enum DB { @@ -474,7 +474,10 @@ async fn get_operations<'a>( let edb = match databases.get(&db.name) { Some(edb) => edb, None => { - warn!("skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL", subphase, db.name); + warn!( + "skipping RunInEachDatabase phase {:?}, database {} doesn't exist in PostgreSQL", + subphase, db.name + ); return Ok(Box::new(empty())); } }; diff --git a/compute_tools/src/swap.rs b/compute_tools/src/swap.rs index 7ba8cb5503..ed27a7cba4 100644 --- a/compute_tools/src/swap.rs +++ b/compute_tools/src/swap.rs @@ -1,6 +1,6 @@ use std::path::Path; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use tracing::{instrument, warn}; pub const RESIZE_SWAP_BIN: &str = "/neonvm/bin/resize-swap"; diff --git a/compute_tools/tests/config_test.rs b/compute_tools/tests/config_test.rs index 9ab16b1930..7b2bff23d5 100644 --- a/compute_tools/tests/config_test.rs +++ b/compute_tools/tests/config_test.rs @@ -1,7 +1,7 @@ #[cfg(test)] mod config_tests { - use std::fs::{remove_file, File}; + use std::fs::{File, remove_file}; use std::io::{Read, Write}; use std::path::Path; diff --git a/libs/compute_api/Cargo.toml b/libs/compute_api/Cargo.toml index c11a1b6688..0d1618c1b2 100644 --- a/libs/compute_api/Cargo.toml +++ b/libs/compute_api/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "compute_api" version = "0.1.0" -edition.workspace = true +edition = "2024" license.workspace = true [dependencies] diff --git a/libs/compute_api/src/requests.rs b/libs/compute_api/src/requests.rs index 0c256cae2e..3fbdfcf83f 100644 --- a/libs/compute_api/src/requests.rs +++ b/libs/compute_api/src/requests.rs @@ -1,11 +1,10 @@ //! Structs representing the JSON formats used in the compute_ctl's HTTP API. -use crate::{ - privilege::Privilege, - responses::ComputeCtlConfig, - spec::{ComputeSpec, ExtVersion, PgIdent}, -}; use serde::{Deserialize, Serialize}; +use crate::privilege::Privilege; +use crate::responses::ComputeCtlConfig; +use crate::spec::{ComputeSpec, ExtVersion, PgIdent}; + /// Request of the /configure API /// /// We now pass only `spec` in the configuration request, but later we can diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index a6248019d9..35c580bd37 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -6,10 +6,8 @@ use chrono::{DateTime, Utc}; use jsonwebtoken::jwk::JwkSet; use serde::{Deserialize, Serialize, Serializer}; -use crate::{ - privilege::Privilege, - spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role}, -}; +use crate::privilege::Privilege; +use crate::spec::{ComputeSpec, Database, ExtVersion, PgIdent, Role}; #[derive(Serialize, Debug, Deserialize)] pub struct GenericAPIError { diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 8fffae92fb..d02bfd6814 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -5,13 +5,12 @@ //! and connect it to the storage nodes. use std::collections::HashMap; +use regex::Regex; +use remote_storage::RemotePath; use serde::{Deserialize, Serialize}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; -use regex::Regex; -use remote_storage::RemotePath; - /// String type alias representing Postgres identifier and /// intended to be used for DB / role names. pub type PgIdent = String; @@ -339,9 +338,10 @@ pub struct JwksSettings { #[cfg(test)] mod tests { - use super::*; use std::fs::File; + use super::*; + #[test] fn allow_installing_remote_extensions() { let rspec: RemoteExtSpec = serde_json::from_value(serde_json::json!({ From 01581f3af580bdb4f311968b8c2c513a914c8c8a Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Wed, 26 Feb 2025 16:32:37 +0300 Subject: [PATCH 028/207] safekeeper: drop json_ctrl (#10722) ## Problem json_ctrl.rs is an obsolete attempt to have tests with fine control of feeding messages into safekeeper superseded by desim framework. ## Summary of changes Drop it. --- safekeeper/src/handler.rs | 13 -- safekeeper/src/json_ctrl.rs | 192 ----------------------- safekeeper/src/lib.rs | 1 - test_runner/fixtures/neon_fixtures.py | 27 ---- test_runner/regress/test_wal_acceptor.py | 54 ------- 5 files changed, 287 deletions(-) delete mode 100644 safekeeper/src/json_ctrl.rs diff --git a/safekeeper/src/handler.rs b/safekeeper/src/handler.rs index dd7008c87d..5ca3d1b7c2 100644 --- a/safekeeper/src/handler.rs +++ b/safekeeper/src/handler.rs @@ -23,7 +23,6 @@ use utils::postgres_client::PostgresClientProtocol; use utils::shard::{ShardCount, ShardNumber}; use crate::auth::check_permission; -use crate::json_ctrl::{AppendLogicalMessage, handle_json_ctrl}; use crate::metrics::{PG_QUERIES_GAUGE, TrafficMetrics}; use crate::timeline::TimelineError; use crate::{GlobalTimelines, SafeKeeperConf}; @@ -62,9 +61,6 @@ enum SafekeeperPostgresCommand { }, IdentifySystem, TimelineStatus, - JSONCtrl { - cmd: AppendLogicalMessage, - }, } fn parse_cmd(cmd: &str) -> anyhow::Result { @@ -134,11 +130,6 @@ fn parse_cmd(cmd: &str) -> anyhow::Result { Ok(SafekeeperPostgresCommand::IdentifySystem) } else if cmd.starts_with("TIMELINE_STATUS") { Ok(SafekeeperPostgresCommand::TimelineStatus) - } else if cmd.starts_with("JSON_CTRL") { - let cmd = cmd.strip_prefix("JSON_CTRL").context("invalid prefix")?; - Ok(SafekeeperPostgresCommand::JSONCtrl { - cmd: serde_json::from_str(cmd)?, - }) } else { anyhow::bail!("unsupported command {cmd}"); } @@ -150,7 +141,6 @@ fn cmd_to_string(cmd: &SafekeeperPostgresCommand) -> &str { SafekeeperPostgresCommand::StartReplication { .. } => "START_REPLICATION", SafekeeperPostgresCommand::TimelineStatus => "TIMELINE_STATUS", SafekeeperPostgresCommand::IdentifySystem => "IDENTIFY_SYSTEM", - SafekeeperPostgresCommand::JSONCtrl { .. } => "JSON_CTRL", } } @@ -359,9 +349,6 @@ impl postgres_backend::Handler } SafekeeperPostgresCommand::IdentifySystem => self.handle_identify_system(pgb).await, SafekeeperPostgresCommand::TimelineStatus => self.handle_timeline_status(pgb).await, - SafekeeperPostgresCommand::JSONCtrl { ref cmd } => { - handle_json_ctrl(self, pgb, cmd).await - } } }) } diff --git a/safekeeper/src/json_ctrl.rs b/safekeeper/src/json_ctrl.rs deleted file mode 100644 index 793ea9c3e9..0000000000 --- a/safekeeper/src/json_ctrl.rs +++ /dev/null @@ -1,192 +0,0 @@ -//! -//! This module implements JSON_CTRL protocol, which allows exchange -//! JSON messages over psql for testing purposes. -//! -//! Currently supports AppendLogicalMessage, which is used for WAL -//! modifications in tests. -//! - -use anyhow::Context; -use postgres_backend::{PostgresBackend, QueryError}; -use postgres_ffi::{WAL_SEGMENT_SIZE, encode_logical_message}; -use pq_proto::{BeMessage, RowDescriptor, TEXT_OID}; -use safekeeper_api::membership::{Configuration, INVALID_GENERATION}; -use safekeeper_api::{ServerInfo, Term}; -use serde::{Deserialize, Serialize}; -use tokio::io::{AsyncRead, AsyncWrite}; -use tracing::*; -use utils::lsn::Lsn; - -use crate::handler::SafekeeperPostgresHandler; -use crate::safekeeper::{ - AcceptorProposerMessage, AppendRequest, AppendRequestHeader, AppendResponse, - ProposerAcceptorMessage, ProposerElected, TermHistory, TermLsn, -}; -use crate::state::TimelinePersistentState; -use crate::timeline::WalResidentTimeline; - -#[derive(Serialize, Deserialize, Debug)] -pub struct AppendLogicalMessage { - // prefix and message to build LogicalMessage - pub lm_prefix: String, - pub lm_message: String, - - // if true, commit_lsn will match flush_lsn after append - pub set_commit_lsn: bool, - - // if true, ProposerElected will be sent before append - pub send_proposer_elected: bool, - - // fields from AppendRequestHeader - pub term: Term, - #[serde(with = "utils::lsn::serde_as_u64")] - pub epoch_start_lsn: Lsn, - #[serde(with = "utils::lsn::serde_as_u64")] - pub begin_lsn: Lsn, - #[serde(with = "utils::lsn::serde_as_u64")] - pub truncate_lsn: Lsn, - pub pg_version: u32, -} - -#[derive(Debug, Serialize)] -struct AppendResult { - // safekeeper state after append - state: TimelinePersistentState, - // info about new record in the WAL - inserted_wal: InsertedWAL, -} - -/// Handles command to craft logical message WAL record with given -/// content, and then append it with specified term and lsn. This -/// function is used to test safekeepers in different scenarios. -pub async fn handle_json_ctrl( - spg: &SafekeeperPostgresHandler, - pgb: &mut PostgresBackend, - append_request: &AppendLogicalMessage, -) -> Result<(), QueryError> { - info!("JSON_CTRL request: {append_request:?}"); - - // need to init safekeeper state before AppendRequest - let tli = prepare_safekeeper(spg, append_request.pg_version).await?; - - // if send_proposer_elected is true, we need to update local history - if append_request.send_proposer_elected { - send_proposer_elected(&tli, append_request.term, append_request.epoch_start_lsn).await?; - } - - let inserted_wal = append_logical_message(&tli, append_request).await?; - let response = AppendResult { - state: tli.get_state().await.1, - inserted_wal, - }; - let response_data = serde_json::to_vec(&response) - .with_context(|| format!("Response {response:?} is not a json array"))?; - - pgb.write_message_noflush(&BeMessage::RowDescription(&[RowDescriptor { - name: b"json", - typoid: TEXT_OID, - typlen: -1, - ..Default::default() - }]))? - .write_message_noflush(&BeMessage::DataRow(&[Some(&response_data)]))? - .write_message_noflush(&BeMessage::CommandComplete(b"JSON_CTRL"))?; - Ok(()) -} - -/// Prepare safekeeper to process append requests without crashes, -/// by sending ProposerGreeting with default server.wal_seg_size. -async fn prepare_safekeeper( - spg: &SafekeeperPostgresHandler, - pg_version: u32, -) -> anyhow::Result { - let tli = spg - .global_timelines - .create( - spg.ttid, - Configuration::empty(), - ServerInfo { - pg_version, - wal_seg_size: WAL_SEGMENT_SIZE as u32, - system_id: 0, - }, - Lsn::INVALID, - Lsn::INVALID, - ) - .await?; - - tli.wal_residence_guard().await -} - -async fn send_proposer_elected( - tli: &WalResidentTimeline, - term: Term, - lsn: Lsn, -) -> anyhow::Result<()> { - // add new term to existing history - let history = tli.get_state().await.1.acceptor_state.term_history; - let history = history.up_to(lsn.checked_sub(1u64).unwrap()); - let mut history_entries = history.0; - history_entries.push(TermLsn { term, lsn }); - let history = TermHistory(history_entries); - - let proposer_elected_request = ProposerAcceptorMessage::Elected(ProposerElected { - generation: INVALID_GENERATION, - term, - start_streaming_at: lsn, - term_history: history, - }); - - tli.process_msg(&proposer_elected_request).await?; - Ok(()) -} - -#[derive(Debug, Serialize)] -pub struct InsertedWAL { - begin_lsn: Lsn, - pub end_lsn: Lsn, - append_response: AppendResponse, -} - -/// Extend local WAL with new LogicalMessage record. To do that, -/// create AppendRequest with new WAL and pass it to safekeeper. -pub async fn append_logical_message( - tli: &WalResidentTimeline, - msg: &AppendLogicalMessage, -) -> anyhow::Result { - let wal_data = encode_logical_message(&msg.lm_prefix, &msg.lm_message); - let sk_state = tli.get_state().await.1; - - let begin_lsn = msg.begin_lsn; - let end_lsn = begin_lsn + wal_data.len() as u64; - - let commit_lsn = if msg.set_commit_lsn { - end_lsn - } else { - sk_state.commit_lsn - }; - - let append_request = ProposerAcceptorMessage::AppendRequest(AppendRequest { - h: AppendRequestHeader { - generation: INVALID_GENERATION, - term: msg.term, - begin_lsn, - end_lsn, - commit_lsn, - truncate_lsn: msg.truncate_lsn, - }, - wal_data, - }); - - let response = tli.process_msg(&append_request).await?; - - let append_response = match response { - Some(AcceptorProposerMessage::AppendResponse(resp)) => resp, - _ => anyhow::bail!("not AppendResponse"), - }; - - Ok(InsertedWAL { - begin_lsn, - end_lsn, - append_response, - }) -} diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index c52b097066..de3b783508 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -21,7 +21,6 @@ pub mod copy_timeline; pub mod debug_dump; pub mod handler; pub mod http; -pub mod json_ctrl; pub mod metrics; pub mod patch_control_file; pub mod pull_timeline; diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 1d282971b1..5159ad4e3b 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -4523,33 +4523,6 @@ class Safekeeper(LogUtils): for na in not_allowed: assert not self.log_contains(na) - def append_logical_message( - self, tenant_id: TenantId, timeline_id: TimelineId, request: dict[str, Any] - ) -> dict[str, Any]: - """ - Send JSON_CTRL query to append LogicalMessage to WAL and modify - safekeeper state. It will construct LogicalMessage from provided - prefix and message, and then will write it to WAL. - """ - - # "replication=0" hacks psycopg not to send additional queries - # on startup, see https://github.com/psycopg/psycopg2/pull/482 - token = self.env.auth_keys.generate_tenant_token(tenant_id) - connstr = f"host=localhost port={self.port.pg} password={token} replication=0 options='-c timeline_id={timeline_id} tenant_id={tenant_id}'" - - with closing(psycopg2.connect(connstr)) as conn: - # server doesn't support transactions - conn.autocommit = True - with conn.cursor() as cur: - request_json = json.dumps(request) - log.info(f"JSON_CTRL request on port {self.port.pg}: {request_json}") - cur.execute("JSON_CTRL " + request_json) - all = cur.fetchall() - log.info(f"JSON_CTRL response: {all[0][0]}") - res = json.loads(all[0][0]) - assert isinstance(res, dict) - return res - def http_client( self, auth_token: str | None = None, gen_sk_wide_token: bool = True ) -> SafekeeperHttpClient: diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index c5045fe4a4..fd9edb359b 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -811,60 +811,6 @@ class ProposerPostgres(PgProtocol): self.pg_bin.run(args) -# insert wal in all safekeepers and run sync on proposer -def test_sync_safekeepers( - neon_env_builder: NeonEnvBuilder, - pg_bin: PgBin, - port_distributor: PortDistributor, -): - # We don't really need the full environment for this test, just the - # safekeepers would be enough. - neon_env_builder.num_safekeepers = 3 - env = neon_env_builder.init_start() - - tenant_id = TenantId.generate() - timeline_id = TimelineId.generate() - - # write config for proposer - pgdata_dir = os.path.join(env.repo_dir, "proposer_pgdata") - pg = ProposerPostgres( - pgdata_dir, pg_bin, tenant_id, timeline_id, "127.0.0.1", port_distributor.get_port() - ) - pg.create_dir_config(env.get_safekeeper_connstrs()) - - # valid lsn, which is not in the segment start, nor in zero segment - epoch_start_lsn = Lsn("0/16B9188") - begin_lsn = epoch_start_lsn - - # append and commit WAL - lsn_after_append = [] - for i in range(3): - res = env.safekeepers[i].append_logical_message( - tenant_id, - timeline_id, - { - "lm_prefix": "prefix", - "lm_message": "message", - "set_commit_lsn": True, - "send_proposer_elected": True, - "term": 2, - "begin_lsn": int(begin_lsn), - "epoch_start_lsn": int(epoch_start_lsn), - "truncate_lsn": int(epoch_start_lsn), - "pg_version": int(env.pg_version) * 10000, - }, - ) - lsn = Lsn(res["inserted_wal"]["end_lsn"]) - lsn_after_append.append(lsn) - log.info(f"safekeeper[{i}] lsn after append: {lsn}") - - # run sync safekeepers - lsn_after_sync = pg.sync_safekeepers() - log.info(f"lsn after sync = {lsn_after_sync}") - - assert all(lsn_after_sync == lsn for lsn in lsn_after_append) - - @pytest.mark.parametrize("auth_enabled", [False, True]) def test_timeline_status(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled From 86b9703f067a773e4d2bab4d52663ffc0dbaffeb Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 26 Feb 2025 15:36:05 +0100 Subject: [PATCH 029/207] pageserver: set `SO_KEEPALIVE` on the page service socket (#10992) ## Problem If the client connection goes dead without an explicit close (e.g. due to network infrastructure dropping the connection) then we currently won't detect it for a long time, which may e.g. block GetPage flushes and keep the task running. Touches https://github.com/neondatabase/cloud/issues/23515. ## Summary of changes Enable `SO_KEEPALIVE` on the page service socket, to enable periodic TCP keepalive probes. These are configured via Linux sysctls, which will be deployed separately. By default, the first probe is sent after 2 hours, so this doesn't have a practical effect until we change the sysctls. --- pageserver/src/bin/pageserver.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index ab8d37df2e..703629aed5 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -14,6 +14,7 @@ use camino::Utf8Path; use clap::{Arg, ArgAction, Command}; use metrics::launch_timestamp::{LaunchTimestamp, set_launch_timestamp_metric}; use metrics::set_build_info_metric; +use nix::sys::socket::{setsockopt, sockopt}; use pageserver::config::{PageServerConf, PageserverIdentity}; use pageserver::controller_upcall_client::ControllerUpcallClient; use pageserver::deletion_queue::DeletionQueue; @@ -347,6 +348,13 @@ fn start_pageserver( info!("Starting pageserver pg protocol handler on {pg_addr}"); let pageserver_listener = tcp_listener::bind(pg_addr)?; + // Enable SO_KEEPALIVE on the socket, to detect dead connections faster. + // These are configured via net.ipv4.tcp_keepalive_* sysctls. + // + // TODO: also set this on the walreceiver socket, but tokio-postgres doesn't + // support enabling keepalives while using the default OS sysctls. + setsockopt(&pageserver_listener, sockopt::KeepAlive, &true)?; + // Launch broker client // The storage_broker::connect call needs to happen inside a tokio runtime thread. let broker_client = WALRECEIVER_RUNTIME From 14347630a40ebf60d1869921639ce9d5f7f6ae86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 26 Feb 2025 17:11:15 +0100 Subject: [PATCH 030/207] ancestor detach: delete hardlinked layers on error (#10977) Delete layers that we have hardlinked so far when there is an error in `remote_copy`. This prevents a retry of the ancestor detach from stumbling over already present layer files: the hardlink would fail with an error. If there is a crash, we already clean up during the timeline attach: we loop over all layer files and purge all layers that are not referenced by the `index_part.json`. Make sure to hold the timeline gate to prevent races with detach&attach&read from the layer file. These cleanups aren't completely enough however, as there is code after `prepare` as well. To handle errors there, we add a special case for `AlreadyExists` errors during the hardlink, where we check if the layer is an orphan, and if yes, we delete it from local disk. That is ideally not the case we hit, as it is less clear in that scenario where the layer came from, but it provides good defense in depth. Related #10729 Fixes #10970 --- .../src/tenant/timeline/detach_ancestor.rs | 92 +++++++++++++++++-- 1 file changed, 84 insertions(+), 8 deletions(-) diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index c3a7433062..71bd196344 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -12,6 +12,7 @@ use utils::completion; use utils::generation::Generation; use utils::id::TimelineId; use utils::lsn::Lsn; +use utils::sync::gate::GateError; use super::layer_manager::LayerManager; use super::{FlushLayerError, Timeline}; @@ -363,14 +364,25 @@ pub(super) async fn prepare( let mut tasks = tokio::task::JoinSet::new(); let limiter = Arc::new(Semaphore::new(options.copy_concurrency.get())); + let cancel_eval = CancellationToken::new(); for adopted in rest_of_historic { let limiter = limiter.clone(); let timeline = detached.clone(); + let cancel_eval = cancel_eval.clone(); tasks.spawn( async move { - let _permit = limiter.acquire().await; + let _permit = tokio::select! { + permit = limiter.acquire() => { + permit + } + // Wait for the cancellation here instead of letting the entire task be cancelled. + // Cancellations are racy in that they might leave layers on disk. + _ = cancel_eval.cancelled() => { + Err(Error::ShuttingDown)? + } + }; let (owned, did_hardlink) = remote_copy( &adopted, &timeline, @@ -386,7 +398,22 @@ pub(super) async fn prepare( ); } + fn delete_layers(timeline: &Timeline, layers: Vec) -> Result<(), Error> { + // We are deleting layers, so we must hold the gate + let _gate = timeline.gate.enter().map_err(|e| match e { + GateError::GateClosed => Error::ShuttingDown, + })?; + { + layers.into_iter().for_each(|l: Layer| { + l.delete_on_drop(); + std::mem::drop(l); + }); + } + Ok(()) + } + let mut should_fsync = false; + let mut first_err = None; while let Some(res) = tasks.join_next().await { match res { Ok(Ok((owned, did_hardlink))) => { @@ -395,13 +422,24 @@ pub(super) async fn prepare( } new_layers.push(owned); } + + // Don't stop the evaluation on errors, so that we get the full set of hardlinked layers to delete. Ok(Err(failed)) => { - return Err(failed); + cancel_eval.cancel(); + first_err.get_or_insert(failed); + } + Err(je) => { + cancel_eval.cancel(); + first_err.get_or_insert(Error::Prepare(je.into())); } - Err(je) => return Err(Error::Prepare(je.into())), } } + if let Some(failed) = first_err { + delete_layers(detached, new_layers)?; + return Err(failed); + } + // fsync directory again if we hardlinked something if should_fsync { fsync_timeline_dir(detached, ctx).await; @@ -649,6 +687,11 @@ async fn remote_copy( let conf = adoptee.conf; let file_name = adopted.layer_desc().layer_name(); + // We don't want to shut the timeline down during this operation because we do `delete_on_drop` below + let _gate = adoptee.gate.enter().map_err(|e| match e { + GateError::GateClosed => Error::ShuttingDown, + })?; + // depending if Layer::keep_resident, do a hardlink let did_hardlink; let owned = if let Some(adopted_resident) = adopted.keep_resident().await { @@ -660,8 +703,32 @@ async fn remote_copy( &file_name, &metadata.generation, ); - std::fs::hard_link(adopted_path, &adoptee_path) - .map_err(|e| Error::launder(e.into(), Error::Prepare))?; + + match std::fs::hard_link(adopted_path, &adoptee_path) { + Ok(()) => {} + Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { + // In theory we should not get into this situation as we are doing cleanups of the layer file after errors. + // However, we don't do cleanups for errors past `prepare`, so there is the slight chance to get to this branch. + + // Double check that the file is orphan (probably from an earlier attempt), then delete it + let key = file_name.clone().into(); + if adoptee.layers.read().await.contains_key(&key) { + // We are supposed to filter out such cases before coming to this function + return Err(Error::Prepare(anyhow::anyhow!( + "layer file {file_name} already present and inside layer map" + ))); + } + tracing::info!("Deleting orphan layer file to make way for hard linking"); + // Delete orphan layer file and try again, to ensure this layer has a well understood source + std::fs::remove_file(adopted_path) + .map_err(|e| Error::launder(e.into(), Error::Prepare))?; + std::fs::hard_link(adopted_path, &adoptee_path) + .map_err(|e| Error::launder(e.into(), Error::Prepare))?; + } + Err(e) => { + return Err(Error::launder(e.into(), Error::Prepare)); + } + }; did_hardlink = true; Layer::for_resident(conf, adoptee, adoptee_path, file_name, metadata).drop_eviction_guard() } else { @@ -669,12 +736,21 @@ async fn remote_copy( Layer::for_evicted(conf, adoptee, file_name, metadata) }; - let layer = adoptee + let layer = match adoptee .remote_client .copy_timeline_layer(adopted, &owned, cancel) .await - .map(move |()| owned) - .map_err(|e| Error::launder(e, Error::Prepare))?; + { + Ok(()) => owned, + Err(e) => { + { + // Clean up the layer so that on a retry we don't get errors that the file already exists + owned.delete_on_drop(); + std::mem::drop(owned); + } + return Err(Error::launder(e, Error::Prepare)); + } + }; Ok((layer, did_hardlink)) } From a138a6de9b2144d0111c7378b326abe466b69f85 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 26 Feb 2025 11:09:50 -0600 Subject: [PATCH 031/207] fix(pageserver): correctly handle collect_keyspace errors (#10976) ## Problem ref https://github.com/neondatabase/neon/issues/10927 ## Summary of changes * Implement `is_critical` and `is_cancel` over `CompactionError`. * Revisit all places that uses `CollectKeyspaceError` to ensure they are handled correctly. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant.rs | 2 + pageserver/src/tenant/tasks.rs | 3 +- pageserver/src/tenant/timeline.rs | 65 +++++++++++++------- pageserver/src/tenant/timeline/compaction.rs | 28 ++++----- 4 files changed, 58 insertions(+), 40 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 71dc3c9075..9243f131ad 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3141,11 +3141,13 @@ impl Tenant { /// Trips the compaction circuit breaker if appropriate. pub(crate) fn maybe_trip_compaction_breaker(&self, err: &CompactionError) { match err { + err if err.is_cancel() => {} CompactionError::ShuttingDown => (), // Offload failures don't trip the circuit breaker, since they're cheap to retry and // shouldn't block compaction. CompactionError::Offload(_) => {} CompactionError::CollectKeySpaceError(err) => { + // CollectKeySpaceError::Cancelled and PageRead::Cancelled are handled in `err.is_cancel` branch. self.compaction_circuit_breaker .lock() .unwrap() diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index 670f9ad87f..c90f81889b 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -289,15 +289,14 @@ fn log_compaction_error( ) { use CompactionError::*; - use crate::pgdatadir_mapping::CollectKeySpaceError; use crate::tenant::PageReconstructError; use crate::tenant::upload_queue::NotInitialized; let level = match err { + e if e.is_cancel() => return, ShuttingDown => return, Offload(_) => Level::ERROR, AlreadyRunning(_) => Level::ERROR, - CollectKeySpaceError(CollectKeySpaceError::Cancelled) => Level::INFO, CollectKeySpaceError(_) => Level::ERROR, _ if task_cancelled => Level::INFO, Other(err) => { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index cbbcf5d358..dfa50d498c 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -1864,16 +1864,25 @@ impl Timeline { }; // Signal compaction failure to avoid L0 flush stalls when it's broken. - match result { + match &result { Ok(_) => self.compaction_failed.store(false, AtomicOrdering::Relaxed), - Err(CompactionError::Other(_)) | Err(CompactionError::CollectKeySpaceError(_)) => { + Err(e) if e.is_cancel() => {} + Err(CompactionError::ShuttingDown) => { + // Covered by the `Err(e) if e.is_cancel()` branch. + } + Err(CompactionError::AlreadyRunning(_)) => { + // Covered by the `Err(e) if e.is_cancel()` branch. + } + Err(CompactionError::Other(_)) => { + self.compaction_failed.store(true, AtomicOrdering::Relaxed) + } + Err(CompactionError::CollectKeySpaceError(_)) => { + // Cancelled errors are covered by the `Err(e) if e.is_cancel()` branch. self.compaction_failed.store(true, AtomicOrdering::Relaxed) } // Don't change the current value on offload failure or shutdown. We don't want to // abruptly stall nor resume L0 flushes in these cases. Err(CompactionError::Offload(_)) => {} - Err(CompactionError::ShuttingDown) => {} - Err(CompactionError::AlreadyRunning(_)) => {} }; result @@ -4688,10 +4697,7 @@ impl Timeline { )); } - let (dense_ks, sparse_ks) = self - .collect_keyspace(lsn, ctx) - .await - .map_err(CompactionError::CollectKeySpaceError)?; + let (dense_ks, sparse_ks) = self.collect_keyspace(lsn, ctx).await?; let dense_partitioning = dense_ks.partition(&self.shard_identity, partition_size); let sparse_partitioning = SparseKeyPartitioning { parts: vec![sparse_ks], @@ -5417,13 +5423,42 @@ pub(crate) enum CompactionError { Offload(OffloadError), /// Compaction cannot be done right now; page reconstruction and so on. #[error("Failed to collect keyspace: {0}")] - CollectKeySpaceError(CollectKeySpaceError), + CollectKeySpaceError(#[from] CollectKeySpaceError), #[error(transparent)] Other(anyhow::Error), #[error("Compaction already running: {0}")] AlreadyRunning(&'static str), } +impl CompactionError { + /// Errors that can be ignored, i.e., cancel and shutdown. + pub fn is_cancel(&self) -> bool { + matches!( + self, + Self::ShuttingDown + | Self::AlreadyRunning(_) + | Self::CollectKeySpaceError(CollectKeySpaceError::Cancelled) + | Self::CollectKeySpaceError(CollectKeySpaceError::PageRead( + PageReconstructError::Cancelled + )) + | Self::Offload(OffloadError::Cancelled) + ) + } + + /// Critical errors that indicate data corruption. + pub fn is_critical(&self) -> bool { + matches!( + self, + Self::CollectKeySpaceError( + CollectKeySpaceError::Decode(_) + | CollectKeySpaceError::PageRead( + PageReconstructError::MissingKey(_) | PageReconstructError::WalRedo(_), + ) + ) + ) + } +} + impl From for CompactionError { fn from(e: OffloadError) -> Self { match e { @@ -5433,18 +5468,6 @@ impl From for CompactionError { } } -impl From for CompactionError { - fn from(err: CollectKeySpaceError) -> Self { - match err { - CollectKeySpaceError::Cancelled - | CollectKeySpaceError::PageRead(PageReconstructError::Cancelled) => { - CompactionError::ShuttingDown - } - e => CompactionError::Other(e.into()), - } - } -} - impl From for CompactionError { fn from(value: super::upload_queue::NotInitialized) -> Self { match value { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index d221bf53d2..091bd583d7 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -8,6 +8,13 @@ use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque}; use std::ops::{Deref, Range}; use std::sync::Arc; +use super::layer_manager::LayerManager; +use super::{ + CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder, + GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, RecordedDuration, + Timeline, +}; + use anyhow::{Context, anyhow, bail}; use bytes::Bytes; use enumset::EnumSet; @@ -31,15 +38,8 @@ use utils::critical; use utils::id::TimelineId; use utils::lsn::Lsn; -use super::layer_manager::LayerManager; -use super::{ - CompactFlags, CompactOptions, CompactionError, CreateImageLayersError, DurationRecorder, - GetVectoredError, ImageLayerCreationMode, LastImageLayerCreationStatus, PageReconstructError, - RecordedDuration, Timeline, -}; use crate::context::{AccessStatsBehavior, RequestContext, RequestContextBuilder}; use crate::page_cache; -use crate::pgdatadir_mapping::CollectKeySpaceError; use crate::statvfs::Statvfs; use crate::tenant::checks::check_valid_layermap; use crate::tenant::gc_block::GcBlock; @@ -975,18 +975,12 @@ impl Timeline { // Suppress errors when cancelled. Err(_) if self.cancel.is_cancelled() => {} - Err(CompactionError::ShuttingDown) => {} - Err(CompactionError::CollectKeySpaceError(CollectKeySpaceError::Cancelled)) => {} + Err(err) if err.is_cancel() => {} // Alert on critical errors that indicate data corruption. - Err( - err @ CompactionError::CollectKeySpaceError( - CollectKeySpaceError::Decode(_) - | CollectKeySpaceError::PageRead( - PageReconstructError::MissingKey(_) | PageReconstructError::WalRedo(_), - ), - ), - ) => critical!("could not compact, repartitioning keyspace failed: {err:?}"), + Err(err) if err.is_critical() => { + critical!("could not compact, repartitioning keyspace failed: {err:?}"); + } // Log other errors. No partitioning? This is normal, if the timeline was just created // as an empty timeline. Also in unit tests, when we use the timeline as a simple From 8dfa8f0b94d07ac060ffc2ee01ee7aab2f39db30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Wed, 26 Feb 2025 18:17:26 +0100 Subject: [PATCH 032/207] feat(ci): don't build storage on compute-releases and vice versa (#10841) ## Problem Release CI is slow, because we're doing unnecessary work, for example building compute images on storage releases and vice versa. ## Summary of changes - Extract tag generation into reusable workflow and extend it with fetching of previous component releases - Don't build neon images on compute releases and don't build compute images on proxy and storage releases - Reuse images from previous releases for tests on branches where we don't build those images ## Open questions - We differentiate between `TAG` and `COMPUTE_TAG` in a few places, but we don't differentiate between storage and proxy releases. Since they use the same image, this will continue to work, but I'm not sure this is what we want. --- .../scripts}/generate_image_maps.py | 0 .github/scripts/previous-releases.jq | 25 ++ .../scripts}/push_with_image_map.py | 0 .github/workflows/_meta.yml | 103 ++++++ .../workflows/_push-to-container-registry.yml | 4 +- .github/workflows/build_and_test.yml | 306 +++++++++--------- .github/workflows/trigger-e2e-tests.yml | 59 ++-- docker-compose/docker-compose.yml | 2 +- docker-compose/test_extensions_upgrade.sh | 12 +- 9 files changed, 317 insertions(+), 194 deletions(-) rename {scripts => .github/scripts}/generate_image_maps.py (100%) create mode 100644 .github/scripts/previous-releases.jq rename {scripts => .github/scripts}/push_with_image_map.py (100%) create mode 100644 .github/workflows/_meta.yml diff --git a/scripts/generate_image_maps.py b/.github/scripts/generate_image_maps.py similarity index 100% rename from scripts/generate_image_maps.py rename to .github/scripts/generate_image_maps.py diff --git a/.github/scripts/previous-releases.jq b/.github/scripts/previous-releases.jq new file mode 100644 index 0000000000..b0b00bce18 --- /dev/null +++ b/.github/scripts/previous-releases.jq @@ -0,0 +1,25 @@ +# Expects response from https://docs.github.com/en/rest/releases/releases?apiVersion=2022-11-28#list-releases as input, +# with tag names `release` for storage, `release-compute` for compute and `release-proxy` for proxy releases. +# Extract only the `tag_name` field from each release object +[ .[].tag_name ] + +# Transform each tag name into a structured object using regex capture +| reduce map( + capture("^(?release(-(?proxy|compute))?-(?\\d+))$") + | { + component: (.component // "storage"), # Default to "storage" if no component is specified + version: (.version | tonumber), # Convert the version number to an integer + full: .full # Store the full tag name for final output + } + )[] as $entry # Loop over the transformed list + +# Accumulate the latest (highest-numbered) version for each component +({}; + .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end)) + +# Convert the resulting object into an array of formatted strings +| to_entries +| map("\(.key)=\(.value.full)") + +# Output each string separately +| .[] diff --git a/scripts/push_with_image_map.py b/.github/scripts/push_with_image_map.py similarity index 100% rename from scripts/push_with_image_map.py rename to .github/scripts/push_with_image_map.py diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml new file mode 100644 index 0000000000..ac09a51ae9 --- /dev/null +++ b/.github/workflows/_meta.yml @@ -0,0 +1,103 @@ +name: Generate run metadata +on: + workflow_call: + inputs: + github-event-name: + type: string + required: true + outputs: + build-tag: + description: "Tag for the current workflow run" + value: ${{ jobs.tags.outputs.build-tag }} + previous-storage-release: + description: "Tag of the last storage release" + value: ${{ jobs.tags.outputs.storage }} + previous-proxy-release: + description: "Tag of the last proxy release" + value: ${{ jobs.tags.outputs.proxy }} + previous-compute-release: + description: "Tag of the last compute release" + value: ${{ jobs.tags.outputs.compute }} + run-kind: + description: "The kind of run we're currently in. Will be one of `pr-main`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`" + value: ${{ jobs.tags.outputs.run-kind }} + +permissions: {} + +jobs: + tags: + runs-on: ubuntu-22.04 + outputs: + build-tag: ${{ steps.build-tag.outputs.tag }} + compute: ${{ steps.previous-releases.outputs.compute }} + proxy: ${{ steps.previous-releases.outputs.proxy }} + storage: ${{ steps.previous-releases.outputs.storage }} + run-kind: ${{ steps.run-kind.outputs.run-kind }} + permissions: + contents: read + steps: + # Need `fetch-depth: 0` to count the number of commits in the branch + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get run kind + id: run-kind + env: + RUN_KIND: >- + ${{ + false + || (inputs.github-event-name == 'push' && github.ref_name == 'main') && 'push-main' + || (inputs.github-event-name == 'push' && github.ref_name == 'release') && 'storage-release' + || (inputs.github-event-name == 'push' && github.ref_name == 'release-compute') && 'compute-release' + || (inputs.github-event-name == 'push' && github.ref_name == 'release-proxy') && 'proxy-release' + || (inputs.github-event-name == 'pull_request' && github.base_ref == 'main') && 'pr-main' + || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release') && 'storage-rc-pr' + || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr' + || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy') && 'proxy-rc-pr' + || 'unknown' + }} + run: | + echo "run-kind=$RUN_KIND" | tee -a $GITHUB_OUTPUT + + - name: Get build tag + id: build-tag + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }} + CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + RUN_KIND: ${{ steps.run-kind.outputs.run-kind }} + run: | + case $RUN_KIND in + push-main) + echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT + ;; + storage-release) + echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT + ;; + proxy-release) + echo "tag=release-proxy-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT + ;; + compute-release) + echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT + ;; + pr-main|storage-rc-pr|compute-rc-pr|proxy-rc-pr) + BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') + echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT + ;; + *) + echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!" + exit 1 + esac + + - name: Get the previous release-tags + id: previous-releases + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + gh api --paginate \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/${GITHUB_REPOSITORY}/releases" \ + | jq -f .github/scripts/previous-releases.jq -r \ + | tee -a "${GITHUB_OUTPUT}" diff --git a/.github/workflows/_push-to-container-registry.yml b/.github/workflows/_push-to-container-registry.yml index 403d078988..2dab665f40 100644 --- a/.github/workflows/_push-to-container-registry.yml +++ b/.github/workflows/_push-to-container-registry.yml @@ -51,7 +51,7 @@ jobs: steps: - uses: actions/checkout@v4 with: - sparse-checkout: scripts/push_with_image_map.py + sparse-checkout: .github/scripts/push_with_image_map.py sparse-checkout-cone-mode: false - name: Print image-map @@ -99,6 +99,6 @@ jobs: password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - name: Copy docker images to target registries - run: python scripts/push_with_image_map.py + run: python3 .github/scripts/push_with_image_map.py env: IMAGE_MAP: ${{ inputs.image-map }} diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 132f122b7c..1fd4f292e8 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -65,38 +65,11 @@ jobs: token: ${{ secrets.GITHUB_TOKEN }} filters: .github/file-filters.yaml - tag: + meta: needs: [ check-permissions ] - runs-on: [ self-hosted, small ] - container: ${{ vars.NEON_DEV_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_ECR_REGION }}.amazonaws.com/base:pinned - outputs: - build-tag: ${{steps.build-tag.outputs.tag}} - - steps: - # Need `fetch-depth: 0` to count the number of commits in the branch - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Get build tag - run: | - echo run:$GITHUB_RUN_ID - echo ref:$GITHUB_REF_NAME - echo rev:$(git rev-list --count HEAD) - if [[ "$GITHUB_REF_NAME" == "main" ]]; then - echo "tag=$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release" ]]; then - echo "tag=release-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then - echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then - echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - else - echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release', 'release-proxy', 'release-compute'" - echo "tag=$GITHUB_RUN_ID" >> $GITHUB_OUTPUT - fi - shell: bash - id: build-tag + uses: ./.github/workflows/_meta.yml + with: + github-event-name: ${{ github.event_name }} build-build-tools-image: needs: [ check-permissions ] @@ -199,7 +172,7 @@ jobs: secrets: inherit build-and-test-locally: - needs: [ tag, build-build-tools-image ] + needs: [ meta, build-build-tools-image ] strategy: fail-fast: false matrix: @@ -213,7 +186,7 @@ jobs: with: arch: ${{ matrix.arch }} build-tools-image: ${{ needs.build-build-tools-image.outputs.image }}-bookworm - build-tag: ${{ needs.tag.outputs.build-tag }} + build-tag: ${{ needs.meta.outputs.build-tag }} build-type: ${{ matrix.build-type }} # Run tests on all Postgres versions in release builds and only on the latest version in debug builds. # Run without LFC on v17 release and debug builds only. For all the other cases LFC is enabled. @@ -497,13 +470,24 @@ jobs: }) trigger-e2e-tests: - if: ${{ !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') || github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' }} - needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, tag ] + # Depends on jobs that can get skipped + if: >- + ${{ + ( + !github.event.pull_request.draft + || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') + || contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) + ) && !failure() && !cancelled() + }} + needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ] uses: ./.github/workflows/trigger-e2e-tests.yml + with: + github-event-name: ${{ github.event_name }} secrets: inherit neon-image-arch: - needs: [ check-permissions, build-build-tools-image, tag ] + needs: [ check-permissions, build-build-tools-image, meta ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} strategy: matrix: arch: [ x64, arm64 ] @@ -539,7 +523,7 @@ jobs: build-args: | ADDITIONAL_RUSTFLAGS=${{ matrix.arch == 'arm64' && '-Ctarget-feature=+lse -Ctarget-cpu=neoverse-n1' || '' }} GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} - BUILD_TAG=${{ needs.tag.outputs.build-tag }} + BUILD_TAG=${{ needs.meta.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-bookworm DEBIAN_VERSION=bookworm provenance: false @@ -549,10 +533,11 @@ jobs: cache-from: type=registry,ref=cache.neon.build/neon:cache-bookworm-${{ matrix.arch }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/neon:cache-{0}-{1},mode=max', 'bookworm', matrix.arch) || '' }} tags: | - neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-${{ matrix.arch }} + neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-${{ matrix.arch }} neon-image: - needs: [ neon-image-arch, tag ] + needs: [ neon-image-arch, meta ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 permissions: id-token: write # aws-actions/configure-aws-credentials @@ -567,13 +552,14 @@ jobs: - name: Create multi-arch image run: | - docker buildx imagetools create -t neondatabase/neon:${{ needs.tag.outputs.build-tag }} \ - -t neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm \ - neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-x64 \ - neondatabase/neon:${{ needs.tag.outputs.build-tag }}-bookworm-arm64 + docker buildx imagetools create -t neondatabase/neon:${{ needs.meta.outputs.build-tag }} \ + -t neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm \ + neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-x64 \ + neondatabase/neon:${{ needs.meta.outputs.build-tag }}-bookworm-arm64 compute-node-image-arch: - needs: [ check-permissions, build-build-tools-image, tag ] + needs: [ check-permissions, build-build-tools-image, meta ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -631,7 +617,7 @@ jobs: build-args: | GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} PG_VERSION=${{ matrix.version.pg }} - BUILD_TAG=${{ needs.tag.outputs.build-tag }} + BUILD_TAG=${{ needs.meta.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }} DEBIAN_VERSION=${{ matrix.version.debian }} provenance: false @@ -641,7 +627,7 @@ jobs: cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} cache-to: ${{ github.ref_name == 'main' && format('type=registry,ref=cache.neon.build/compute-node-{0}:cache-{1}-{2},mode=max', matrix.version.pg, matrix.version.debian, matrix.arch) || '' }} tags: | - neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }} + neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-${{ matrix.arch }} - name: Build neon extensions test image if: matrix.version.pg >= 'v16' @@ -651,7 +637,7 @@ jobs: build-args: | GIT_VERSION=${{ github.event.pull_request.head.sha || github.sha }} PG_VERSION=${{ matrix.version.pg }} - BUILD_TAG=${{ needs.tag.outputs.build-tag }} + BUILD_TAG=${{ needs.meta.outputs.build-tag }} TAG=${{ needs.build-build-tools-image.outputs.image-tag }}-${{ matrix.version.debian }} DEBIAN_VERSION=${{ matrix.version.debian }} provenance: false @@ -661,10 +647,11 @@ jobs: target: extension-tests cache-from: type=registry,ref=cache.neon.build/compute-node-${{ matrix.version.pg }}:cache-${{ matrix.version.debian }}-${{ matrix.arch }} tags: | - neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.tag.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }} + neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{needs.meta.outputs.build-tag}}-${{ matrix.version.debian }}-${{ matrix.arch }} compute-node-image: - needs: [ compute-node-image-arch, tag ] + needs: [ compute-node-image-arch, meta ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -692,21 +679,22 @@ jobs: - name: Create multi-arch compute-node image run: | - docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ - -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \ - neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ - neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 + docker buildx imagetools create -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ + -t neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \ + neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ + neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 - name: Create multi-arch neon-test-extensions image if: matrix.version.pg >= 'v16' run: | - docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ - -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }} \ - neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ - neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 + docker buildx imagetools create -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ + -t neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }} \ + neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ + neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 vm-compute-node-image: - needs: [ check-permissions, tag, compute-node-image ] + needs: [ check-permissions, meta, compute-node-image ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: [ self-hosted, large ] strategy: fail-fast: false @@ -742,23 +730,25 @@ jobs: # it won't have the proper authentication (written at v0.6.0) - name: Pulling compute-node image run: | - docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} + docker pull neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} - name: Build vm image run: | ./vm-builder \ -size=2G \ -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \ - -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ - -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} \ + -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ + -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ -target-arch=linux/amd64 - name: Pushing vm-compute-node image run: | - docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.tag.outputs.build-tag }} + docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} test-images: - needs: [ check-permissions, tag, neon-image, compute-node-image ] + needs: [ check-permissions, meta, neon-image, compute-node-image ] + # Depends on jobs that can get skipped + if: "!failure() && !cancelled()" strategy: fail-fast: false matrix: @@ -776,17 +766,6 @@ jobs: username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} - - name: Get the last compute release tag - id: get-last-compute-release-tag - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - run: | - tag=$(gh api -q '[.[].tag_name | select(startswith("release-compute"))][0]'\ - -H "Accept: application/vnd.github+json" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "/repos/${{ github.repository }}/releases") - echo tag=${tag} >> ${GITHUB_OUTPUT} - # `neondatabase/neon` contains multiple binaries, all of them use the same input for the version into the same version formatting library. # Pick pageserver as currently the only binary with extra "version" features printed in the string to verify. # Regular pageserver version string looks like @@ -796,8 +775,9 @@ jobs: # Ensure that we don't have bad versions. - name: Verify image versions shell: bash # ensure no set -e for better error messages + if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} run: | - pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.tag.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version") + pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version") echo "Pageserver version string: $pageserver_version" @@ -814,7 +794,24 @@ jobs: - name: Verify docker-compose example and test extensions timeout-minutes: 20 env: - TAG: ${{needs.tag.outputs.build-tag}} + TAG: >- + ${{ + contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) + && needs.meta.outputs.previous-storage-release + || needs.meta.outputs.build-tag + }} + COMPUTE_TAG: >- + ${{ + contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) + && needs.meta.outputs.previous-compute-release + || needs.meta.outputs.build-tag + }} + TEST_EXTENSIONS_TAG: >- + ${{ + contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) + && 'latest' + || needs.meta.outputs.build-tag + }} TEST_VERSION_ONLY: ${{ matrix.pg_version }} run: ./docker-compose/docker_compose_test.sh @@ -826,10 +823,17 @@ jobs: - name: Test extension upgrade timeout-minutes: 20 - if: ${{ needs.tag.outputs.build-tag == github.run_id }} + if: ${{ contains(fromJSON('["pr-main", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} env: - NEWTAG: ${{ needs.tag.outputs.build-tag }} - OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }} + TAG: >- + ${{ + false + || needs.meta.outputs.run-kind == 'pr-main' && needs.meta.outputs.build-tag + || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release + }} + TEST_EXTENSIONS_TAG: latest + NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }} + OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }} run: ./docker-compose/test_extensions_upgrade.sh - name: Print logs and clean up @@ -839,7 +843,7 @@ jobs: docker compose --profile test-extensions -f ./docker-compose/docker-compose.yml down generate-image-maps: - needs: [ tag ] + needs: [ meta ] runs-on: ubuntu-22.04 outputs: neon-dev: ${{ steps.generate.outputs.neon-dev }} @@ -849,14 +853,14 @@ jobs: steps: - uses: actions/checkout@v4 with: - sparse-checkout: scripts/generate_image_maps.py + sparse-checkout: .github/scripts/generate_image_maps.py sparse-checkout-cone-mode: false - name: Generate Image Maps id: generate - run: python scripts/generate_image_maps.py + run: python3 .github/scripts/generate_image_maps.py env: - BUILD_TAG: "${{ needs.tag.outputs.build-tag }}" + BUILD_TAG: "${{ needs.meta.outputs.build-tag }}" BRANCH: "${{ github.ref_name }}" DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}" PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}" @@ -865,7 +869,8 @@ jobs: AWS_REGION: "${{ vars.AWS_ECR_REGION }}" push-neon-image-dev: - needs: [ generate-image-maps, neon-image ] + needs: [ meta, generate-image-maps, neon-image ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -882,7 +887,8 @@ jobs: secrets: inherit push-compute-image-dev: - needs: [ generate-image-maps, vm-compute-node-image ] + needs: [ meta, generate-image-maps, vm-compute-node-image ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -899,8 +905,8 @@ jobs: secrets: inherit push-neon-image-prod: - if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' - needs: [ generate-image-maps, neon-image, test-images ] + needs: [ meta, generate-image-maps, neon-image, test-images ] + if: ${{ contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -917,8 +923,8 @@ jobs: secrets: inherit push-compute-image-prod: - if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' - needs: [ generate-image-maps, vm-compute-node-image, test-images ] + needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ] + if: ${{ needs.meta.outputs.run-kind == 'compute-release' }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -937,18 +943,19 @@ jobs: # This is a bit of a special case so we're not using a generated image map. add-latest-tag-to-neon-extensions-test-image: if: github.ref_name == 'main' - needs: [ tag, compute-node-image ] + needs: [ meta, compute-node-image ] uses: ./.github/workflows/_push-to-container-registry.yml with: image-map: | { - "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"], - "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.tag.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"] + "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"], + "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"] } secrets: inherit trigger-custom-extensions-build-and-wait: - needs: [ check-permissions, tag ] + needs: [ check-permissions, meta ] + if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 permissions: id-token: write # aws-actions/configure-aws-credentials @@ -983,7 +990,7 @@ jobs: \"ci_job_name\": \"build-and-upload-extensions\", \"commit_hash\": \"$COMMIT_SHA\", \"remote_repo\": \"${{ github.repository }}\", - \"compute_image_tag\": \"${{ needs.tag.outputs.build-tag }}\", + \"compute_image_tag\": \"${{ needs.meta.outputs.build-tag }}\", \"remote_branch_name\": \"${{ github.ref_name }}\" } }" @@ -1027,9 +1034,9 @@ jobs: exit 1 deploy: - needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, tag, build-and-test-locally, trigger-custom-extensions-build-and-wait ] - # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` - if: (github.ref_name == 'main' || github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute') && !failure() && !cancelled() + needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ] + # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod` + if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -1040,108 +1047,103 @@ jobs: - uses: actions/checkout@v4 - name: Create git tag and GitHub release - if: github.ref_name == 'release' || github.ref_name == 'release-proxy' || github.ref_name == 'release-compute' + if: ${{ contains(fromJSON('["storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) }} uses: actions/github-script@v7 + env: + TAG: "${{ needs.meta.outputs.build-tag }}" + BRANCH: "${{ github.ref_name }}" + PREVIOUS_RELEASE: >- + ${{ + false + || needs.meta.outputs.run-kind == 'storage-release' && needs.meta.outputs.previous-storage-release + || needs.meta.outputs.run-kind == 'proxy-release' && needs.meta.outputs.previous-proxy-release + || needs.meta.outputs.run-kind == 'compute-release' && needs.meta.outputs.previous-compute-release + || 'unknown' + }} with: retries: 5 script: | - const tag = "${{ needs.tag.outputs.build-tag }}"; - const branch = "${{ github.ref_name }}"; + const { TAG, BRANCH, PREVIOUS_RELEASE } = process.env try { const existingRef = await github.rest.git.getRef({ owner: context.repo.owner, repo: context.repo.repo, - ref: `tags/${tag}`, + ref: `tags/${TAG}`, }); if (existingRef.data.object.sha !== context.sha) { - throw new Error(`Tag ${tag} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`); + throw new Error(`Tag ${TAG} already exists but points to a different commit (expected: ${context.sha}, actual: ${existingRef.data.object.sha}).`); } - console.log(`Tag ${tag} already exists and points to ${context.sha} as expected.`); + console.log(`Tag ${TAG} already exists and points to ${context.sha} as expected.`); } catch (error) { if (error.status !== 404) { throw error; } - console.log(`Tag ${tag} does not exist. Creating it...`); + console.log(`Tag ${TAG} does not exist. Creating it...`); await github.rest.git.createRef({ owner: context.repo.owner, repo: context.repo.repo, - ref: `refs/tags/${tag}`, + ref: `refs/tags/${TAG}`, sha: context.sha, }); - console.log(`Tag ${tag} created successfully.`); + console.log(`Tag ${TAG} created successfully.`); } try { const existingRelease = await github.rest.repos.getReleaseByTag({ owner: context.repo.owner, repo: context.repo.repo, - tag: tag, + tag: TAG, }); - console.log(`Release for tag ${tag} already exists (ID: ${existingRelease.data.id}).`); + console.log(`Release for tag ${TAG} already exists (ID: ${existingRelease.data.id}).`); } catch (error) { if (error.status !== 404) { throw error; } - console.log(`Release for tag ${tag} does not exist. Creating it...`); + console.log(`Release for tag ${TAG} does not exist. Creating it...`); // Find the PR number using the commit SHA const pullRequests = await github.rest.pulls.list({ owner: context.repo.owner, repo: context.repo.repo, state: 'closed', - base: branch, + base: BRANCH, }); const pr = pullRequests.data.find(pr => pr.merge_commit_sha === context.sha); const prNumber = pr ? pr.number : null; - // Find the previous release on the branch - const releases = await github.rest.repos.listReleases({ - owner: context.repo.owner, - repo: context.repo.repo, - per_page: 100, - }); - - const branchReleases = releases.data - .filter((release) => { - const regex = new RegExp(`^${branch}-\\d+$`); - return regex.test(release.tag_name) && !release.draft && !release.prerelease; - }) - .sort((a, b) => new Date(b.created_at) - new Date(a.created_at)); - - const previousTag = branchReleases.length > 0 ? branchReleases[0].tag_name : null; - const releaseNotes = [ prNumber ? `Release PR https://github.com/${context.repo.owner}/${context.repo.repo}/pull/${prNumber}.` : 'Release PR not found.', - previousTag - ? `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${previousTag}...${tag}.` - : `No previous release found on branch ${branch}.`, + `Diff with the previous release https://github.com/${context.repo.owner}/${context.repo.repo}/compare/${PREVIOUS_RELEASE}...${TAG}.` ].join('\n\n'); await github.rest.repos.createRelease({ owner: context.repo.owner, repo: context.repo.repo, - tag_name: tag, + tag_name: TAG, body: releaseNotes, }); - console.log(`Release for tag ${tag} created successfully.`); + console.log(`Release for tag ${TAG} created successfully.`); } - name: Trigger deploy workflow env: GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} + RUN_KIND: ${{ needs.meta.outputs.run-kind }} run: | - if [[ "$GITHUB_REF_NAME" == "main" ]]; then - gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.tag.outputs.build-tag}} -f deployPreprodRegion=false - elif [[ "$GITHUB_REF_NAME" == "release" ]]; then + case ${RUN_KIND} in + push-main) + gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main -f branch=main -f dockerTag=${{needs.meta.outputs.build-tag}} -f deployPreprodRegion=false + ;; + storage-release) gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \ -f deployPgSniRouter=false \ -f deployProxy=false \ @@ -1149,7 +1151,7 @@ jobs: -f deployStorageBroker=true \ -f deployStorageController=true \ -f branch=main \ - -f dockerTag=${{needs.tag.outputs.build-tag}} \ + -f dockerTag=${{needs.meta.outputs.build-tag}} \ -f deployPreprodRegion=true gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \ @@ -1157,8 +1159,9 @@ jobs: -f deployStorageBroker=true \ -f deployStorageController=true \ -f branch=main \ - -f dockerTag=${{needs.tag.outputs.build-tag}} - elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then + -f dockerTag=${{needs.meta.outputs.build-tag}} + ;; + proxy-release) gh workflow --repo neondatabase/infra run deploy-dev.yml --ref main \ -f deployPgSniRouter=true \ -f deployProxy=true \ @@ -1166,7 +1169,7 @@ jobs: -f deployStorageBroker=false \ -f deployStorageController=false \ -f branch=main \ - -f dockerTag=${{needs.tag.outputs.build-tag}} \ + -f dockerTag=${{needs.meta.outputs.build-tag}} \ -f deployPreprodRegion=true gh workflow --repo neondatabase/infra run deploy-proxy-prod.yml --ref main \ @@ -1176,13 +1179,16 @@ jobs: -f deployProxyScram=true \ -f deployProxyAuthBroker=true \ -f branch=main \ - -f dockerTag=${{needs.tag.outputs.build-tag}} - elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then - gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.tag.outputs.build-tag}} - else - echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main', 'release', 'release-proxy' or 'release-compute'" + -f dockerTag=${{needs.meta.outputs.build-tag}} + ;; + compute-release) + gh workflow --repo neondatabase/infra run deploy-compute-dev.yml --ref main -f dockerTag=${{needs.meta.outputs.build-tag}} + ;; + *) + echo "RUN_KIND (value '${RUN_KIND}') is not set to either 'push-main', 'storage-release', 'proxy-release' or 'compute-release'" exit 1 - fi + ;; + esac notify-storage-release-deploy-failure: needs: [ deploy ] @@ -1207,7 +1213,7 @@ jobs: id-token: write # aws-actions/configure-aws-credentials statuses: write contents: read - # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-to-acr-dev` and `push-to-acr-prod` + # `!failure() && !cancelled()` is required because the workflow transitively depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod` if: github.ref_name == 'release' && !failure() && !cancelled() runs-on: ubuntu-22.04 @@ -1297,7 +1303,8 @@ jobs: pin-build-tools-image: needs: [ build-build-tools-image, test-images, build-and-test-locally ] - if: github.ref_name == 'main' + # `!failure() && !cancelled()` is required because the job (transitively) depends on jobs that can be skipped + if: github.ref_name == 'main' && !failure() && !cancelled() uses: ./.github/workflows/pin-build-tools-image.yml with: from-tag: ${{ needs.build-build-tools-image.outputs.image-tag }} @@ -1316,6 +1323,7 @@ jobs: # Format `needs` differently to make the list more readable. # Usually we do `needs: [...]` needs: + - meta - build-and-test-locally - check-codestyle-python - check-codestyle-rust @@ -1339,7 +1347,7 @@ jobs: || needs.check-codestyle-python.result == 'skipped' || needs.check-codestyle-rust.result == 'skipped' || needs.files-changed.result == 'skipped' - || needs.push-compute-image-dev.result == 'skipped' - || needs.push-neon-image-dev.result == 'skipped' + || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) + || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)) || needs.test-images.result == 'skipped' - || needs.trigger-custom-extensions-build-and-wait.result == 'skipped' + || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) diff --git a/.github/workflows/trigger-e2e-tests.yml b/.github/workflows/trigger-e2e-tests.yml index be6a7a7901..a30da35502 100644 --- a/.github/workflows/trigger-e2e-tests.yml +++ b/.github/workflows/trigger-e2e-tests.yml @@ -5,6 +5,10 @@ on: types: - ready_for_review workflow_call: + inputs: + github-event-name: + type: string + required: true defaults: run: @@ -19,7 +23,7 @@ jobs: if: ${{ !contains(github.event.pull_request.labels.*.name, 'run-no-ci') }} uses: ./.github/workflows/check-permissions.yml with: - github-event-name: ${{ github.event_name }} + github-event-name: ${{ inputs.github-event-name || github.event_name }} cancel-previous-e2e-tests: needs: [ check-permissions ] @@ -35,46 +39,29 @@ jobs: run cancel-previous-in-concurrency-group.yml \ --field concurrency_group="${{ env.E2E_CONCURRENCY_GROUP }}" - tag: - needs: [ check-permissions ] - runs-on: ubuntu-22.04 - outputs: - build-tag: ${{ steps.build-tag.outputs.tag }} - - steps: - # Need `fetch-depth: 0` to count the number of commits in the branch - - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Get build tag - env: - GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - CURRENT_BRANCH: ${{ github.head_ref || github.ref_name }} - CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} - run: | - if [[ "$GITHUB_REF_NAME" == "main" ]]; then - echo "tag=$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release" ]]; then - echo "tag=release-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release-proxy" ]]; then - echo "tag=release-proxy-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - elif [[ "$GITHUB_REF_NAME" == "release-compute" ]]; then - echo "tag=release-compute-$(git rev-list --count HEAD)" >> $GITHUB_OUTPUT - else - echo "GITHUB_REF_NAME (value '$GITHUB_REF_NAME') is not set to either 'main' or 'release'" - BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') - echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT - fi - id: build-tag + meta: + uses: ./.github/workflows/_meta.yml + with: + github-event-name: ${{ inputs.github-event-name || github.event_name }} trigger-e2e-tests: - needs: [ tag ] + needs: [ meta ] runs-on: ubuntu-22.04 env: EVENT_ACTION: ${{ github.event.action }} GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} - TAG: ${{ needs.tag.outputs.build-tag }} + TAG: >- + ${{ + contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) + && needs.meta.outputs.previous-storage-release + || needs.meta.outputs.build-tag + }} + COMPUTE_TAG: >- + ${{ + contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) + && needs.meta.outputs.previous-compute-release + || needs.meta.outputs.build-tag + }} steps: - name: Wait for `push-{neon,compute}-image-dev` job to finish # It's important to have a timeout here, the script in the step can run infinitely @@ -157,6 +144,6 @@ jobs: --raw-field "commit_hash=$COMMIT_SHA" \ --raw-field "remote_repo=${GITHUB_REPOSITORY}" \ --raw-field "storage_image_tag=${TAG}" \ - --raw-field "compute_image_tag=${TAG}" \ + --raw-field "compute_image_tag=${COMPUTE_TAG}" \ --raw-field "concurrency_group=${E2E_CONCURRENCY_GROUP}" \ --raw-field "e2e-platforms=${E2E_PLATFORMS}" diff --git a/docker-compose/docker-compose.yml b/docker-compose/docker-compose.yml index 489d60f38c..95d4ff7b2a 100644 --- a/docker-compose/docker-compose.yml +++ b/docker-compose/docker-compose.yml @@ -186,7 +186,7 @@ services: neon-test-extensions: profiles: ["test-extensions"] - image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TAG:-latest} + image: ${REPOSITORY:-neondatabase}/neon-test-extensions-v${PG_TEST_VERSION:-16}:${TEST_EXTENSIONS_TAG:-${TAG:-latest}} environment: - PGPASSWORD=cloud_admin entrypoint: diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh index c2168c47af..c399109eb9 100755 --- a/docker-compose/test_extensions_upgrade.sh +++ b/docker-compose/test_extensions_upgrade.sh @@ -6,8 +6,8 @@ generate_id() { local -n resvar=$1 printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM } -if [ -z ${OLDTAG+x} ] || [ -z ${NEWTAG+x} ] || [ -z "${OLDTAG}" ] || [ -z "${NEWTAG}" ]; then - echo OLDTAG and NEWTAG must be defined +if [ -z ${OLD_COMPUTE_TAG+x} ] || [ -z ${NEW_COMPUTE_TAG+x} ] || [ -z "${OLD_COMPUTE_TAG}" ] || [ -z "${NEW_COMPUTE_TAG}" ]; then + echo OLD_COMPUTE_TAG and NEW_COMPUTE_TAG must be defined exit 1 fi export PG_VERSION=${PG_VERSION:-16} @@ -47,7 +47,7 @@ EXTENSIONS='[ {"extname": "pg_repack", "extdir": "pg_repack-src"} ]' EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -) -TAG=${NEWTAG} docker compose --profile test-extensions up --quiet-pull --build -d +COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" @@ -55,7 +55,7 @@ create_extensions "${EXTNAMES}" query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')" new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query") docker compose --profile test-extensions down -TAG=${OLDTAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate +COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" @@ -86,8 +86,8 @@ else ) result=$(curl "${PARAMS[@]}") echo $result | jq . - TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} TAG=${OLDTAG} docker compose down compute compute_is_ready - COMPUTE_TAG=${NEWTAG} TAG=${OLDTAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready + TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose down compute compute_is_ready + COMPUTE_TAG=${NEW_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready wait_for_ready TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id") if [ ${TID} != ${new_timeline_id} ]; then From 30f3be98406c710ddf622c193907df2eb668f1ae Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 26 Feb 2025 11:19:14 -0600 Subject: [PATCH 033/207] fix(test): reduce number of relations in test_tx_abort_with_many_relations (#10997) ## Problem I see a lot of timeout errors, which indicates that this test is too slow. It seems that create relations are fast, but the subsequent truncating step is slow. ## Summary of changes Reduce number of relations for now, and investigate later. Signed-off-by: Alex Chi Z --- test_runner/regress/test_pg_regress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index afc7ef3e01..6a76ad5ca8 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -364,7 +364,7 @@ def test_tx_abort_with_many_relations( n = 4000 step = 4000 else: - n = 100000 + n = 20000 step = 5000 def create(): From c1a040447da9e901ccfc87fc9ac329be51dfa703 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Wed, 26 Feb 2025 20:38:44 +0300 Subject: [PATCH 034/207] walproposer: send valid timeline_start_lsn in v2 (#10994) ## Problem https://github.com/neondatabase/neon/pull/10647 dropped timeline_start_lsn from protocol messages as it can be taken from term history. In v2 0 was sent in the placeholder. However, until safekeepers are deployed with that PR they still use the value, setting timeline_start_lsn to 0, which confuses WAL reading; problem appears only when compute includes 10647 but safekeepers don't. ref https://neondb.slack.com/archives/C04DGM6SMTM/p1740577649644269?thread_ts=1740572363.541619&cid=C04DGM6SMTM ## Summary of changes Send real value instead of 0 in v2. --- pgxn/neon/walproposer.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index d7604e30d7..356895aa82 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -1896,7 +1896,12 @@ PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf pq_sendint64_le(buf, m->termHistory->entries[i].term); pq_sendint64_le(buf, m->termHistory->entries[i].lsn); } - pq_sendint64_le(buf, 0); /* removed timeline_start_lsn */ + /* + * Removed timeline_start_lsn. Still send it as a valid + * value until safekeepers taking it from term history are + * deployed. + */ + pq_sendint64_le(buf, m->termHistory->entries[0].lsn); break; } case 'a': From 643a48210fca694eb5601b03c106a261b8a24d2a Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Wed, 26 Feb 2025 22:26:33 +0300 Subject: [PATCH 035/207] safekeeper: exclude API (#10757) ## Problem https://github.com/neondatabase/neon/pull/10241 added configuration switch endpoint, but it didn't delete timeline if node was excluded. ## Summary of changes Add separate /exclude API endpoint which similarly accepts membership configuration where sk is supposed by be excluded. Implementation deletes the timeline locally. Some more small related tweaks: - make mconf switch API PUT instead of POST as it is idempotent; - return 409 if switch was refused instead of 200 with requested & current; - remove unused was_active flag from delete response; - remove meaningless _force suffix from delete functions names; - reuse timeline.rs delete_dir function in timelines_global_map instead of its own copy. part of https://github.com/neondatabase/neon/issues/9965 --- libs/safekeeper_api/src/membership.rs | 11 ++- safekeeper/src/http/routes.rs | 108 ++++++++++++++++++++--- safekeeper/src/timeline.rs | 19 ++-- safekeeper/src/timelines_global_map.rs | 96 +++++++++++--------- test_runner/fixtures/safekeeper/http.py | 14 ++- test_runner/regress/test_wal_acceptor.py | 39 +++++--- 6 files changed, 212 insertions(+), 75 deletions(-) diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs index 4ccdd491b0..bb8934744a 100644 --- a/libs/safekeeper_api/src/membership.rs +++ b/libs/safekeeper_api/src/membership.rs @@ -85,12 +85,12 @@ impl MemberSet { Ok(MemberSet { m: members }) } - pub fn contains(&self, sk: &SafekeeperId) -> bool { - self.m.iter().any(|m| m.id == sk.id) + pub fn contains(&self, sk: NodeId) -> bool { + self.m.iter().any(|m| m.id == sk) } pub fn add(&mut self, sk: SafekeeperId) -> anyhow::Result<()> { - if self.contains(&sk) { + if self.contains(sk.id) { bail!(format!( "sk {} is already member of the set {}", sk.id, self @@ -130,6 +130,11 @@ impl Configuration { new_members: None, } } + + /// Is `sk_id` member of the configuration? + pub fn contains(&self, sk_id: NodeId) -> bool { + self.members.contains(sk_id) || self.new_members.as_ref().is_some_and(|m| m.contains(sk_id)) + } } impl Display for Configuration { diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 3b3bc71ac4..4f47331c85 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -19,7 +19,7 @@ use safekeeper_api::models::{ AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry, TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest, }; -use safekeeper_api::{ServerInfo, models}; +use safekeeper_api::{ServerInfo, membership, models}; use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId}; use tokio::sync::mpsc; use tokio::task; @@ -32,7 +32,7 @@ use utils::lsn::Lsn; use crate::debug_dump::TimelineDigestRequest; use crate::safekeeper::TermLsn; -use crate::timelines_global_map::TimelineDeleteForceResult; +use crate::timelines_global_map::{DeleteOrExclude, TimelineDeleteResult}; use crate::{ GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline, }; @@ -73,10 +73,13 @@ async fn tenant_delete_handler(mut request: Request) -> Result) -> Result>(), + .collect::>(), ) } @@ -208,12 +211,15 @@ async fn timeline_delete_handler(mut request: Request) -> Result) -> Result for ApiError { + fn from(de: DeleteOrExcludeError) -> ApiError { + match de { + DeleteOrExcludeError::Conflict { + requested: _, + current: _, + } => ApiError::Conflict(de.to_string()), + DeleteOrExcludeError::Other(e) => ApiError::InternalServerError(e), + } + } +} + +/// Remove timeline locally after this node has been excluded from the +/// membership configuration. The body is the same as in the membership endpoint +/// -- conf where node is excluded -- and in principle single ep could be used +/// for both actions, but since this is a data deletion op let's keep them +/// separate. +async fn timeline_exclude_handler(mut request: Request) -> Result, ApiError> { + let ttid = TenantTimelineId::new( + parse_request_param(&request, "tenant_id")?, + parse_request_param(&request, "timeline_id")?, + ); + check_permission(&request, Some(ttid.tenant_id))?; + + let global_timelines = get_global_timelines(&request); + let data: models::TimelineMembershipSwitchRequest = json_request(&mut request).await?; + let my_id = get_conf(&request).my_id; + // If request doesn't exclude us, membership switch endpoint should be used + // instead. + if data.mconf.contains(my_id) { + return Err(ApiError::Forbidden(format!( + "refused to switch into {}, node {} is member of it", + data.mconf, my_id + ))); + } + let action = DeleteOrExclude::Exclude(data.mconf); + + let resp = global_timelines + .delete_or_exclude(&ttid, action) + .await + .map_err(ApiError::from)?; + json_response(StatusCode::OK, resp) +} + /// Consider switching timeline membership configuration to the provided one. async fn timeline_membership_handler( mut request: Request, @@ -281,12 +345,29 @@ async fn timeline_membership_handler( let tli = global_timelines.get(ttid).map_err(ApiError::from)?; let data: models::TimelineMembershipSwitchRequest = json_request(&mut request).await?; + let my_id = get_conf(&request).my_id; + // If request excludes us, exclude endpoint should be used instead. + if !data.mconf.contains(my_id) { + return Err(ApiError::Forbidden(format!( + "refused to switch into {}, node {} is not a member of it", + data.mconf, my_id + ))); + } + let req_gen = data.mconf.generation; let response = tli .membership_switch(data.mconf) .await .map_err(ApiError::InternalServerError)?; - json_response(StatusCode::OK, response) + // Return 409 if request was ignored. + if req_gen == response.current_conf.generation { + json_response(StatusCode::OK, response) + } else { + Err(ApiError::Conflict(format!( + "request to switch into {} ignored, current generation {}", + req_gen, response.current_conf.generation + ))) + } } async fn timeline_copy_handler(mut request: Request) -> Result, ApiError> { @@ -637,11 +718,14 @@ pub fn make_router( .post("/v1/pull_timeline", |r| { request_span(r, timeline_pull_handler) }) + .put("/v1/tenant/:tenant_id/timeline/:timeline_id/exclude", |r| { + request_span(r, timeline_exclude_handler) + }) .get( "/v1/tenant/:tenant_id/timeline/:timeline_id/snapshot/:destination_id", |r| request_span(r, timeline_snapshot_handler), ) - .post( + .put( "/v1/tenant/:tenant_id/timeline/:timeline_id/membership", |r| request_span(r, timeline_membership_handler), ) diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index c140f16ced..930f66a207 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -558,11 +558,18 @@ impl Timeline { }); } - /// Background timeline activities (which hold Timeline::gate) will no - /// longer run once this function completes. - pub async fn shutdown(&self) { + /// Cancel the timeline, requesting background activity to stop. Closing + /// the `self.gate` waits for that. + pub async fn cancel(&self) { info!("timeline {} shutting down", self.ttid); self.cancel.cancel(); + } + + /// Background timeline activities (which hold Timeline::gate) will no + /// longer run once this function completes. `Self::cancel` must have been + /// already called. + pub async fn close(&self) { + assert!(self.cancel.is_cancelled()); // Wait for any concurrent tasks to stop using this timeline, to avoid e.g. attempts // to read deleted files. @@ -574,13 +581,13 @@ impl Timeline { /// Also deletes WAL in s3. Might fail if e.g. s3 is unavailable, but /// deletion API endpoint is retriable. /// - /// Timeline must be in shut-down state (i.e. call [`Self::shutdown`] first) + /// Timeline must be in shut-down state (i.e. call [`Self::close`] first) pub async fn delete( &self, shared_state: &mut WriteGuardSharedState<'_>, only_local: bool, ) -> Result { - // Assert that [`Self::shutdown`] was already called + // Assert that [`Self::close`] was already called assert!(self.cancel.is_cancelled()); assert!(self.gate.close_complete()); @@ -1106,7 +1113,7 @@ impl ManagerTimeline { } /// Deletes directory and it's contents. Returns false if directory does not exist. -async fn delete_dir(path: &Utf8PathBuf) -> Result { +pub async fn delete_dir(path: &Utf8PathBuf) -> Result { match fs::remove_dir_all(path).await { Ok(_) => Ok(true), Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false), diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 1d29030711..858dfce807 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -4,16 +4,15 @@ use std::collections::HashMap; use std::str::FromStr; -use std::sync::atomic::Ordering; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; use anyhow::{Context, Result, bail}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; -use safekeeper_api::ServerInfo; use safekeeper_api::membership::Configuration; use safekeeper_api::models::SafekeeperUtilization; +use safekeeper_api::{ServerInfo, membership}; use serde::Serialize; use tokio::fs; use tracing::*; @@ -22,9 +21,10 @@ use utils::id::{TenantId, TenantTimelineId, TimelineId}; use utils::lsn::Lsn; use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; +use crate::http::routes::DeleteOrExcludeError; use crate::rate_limit::RateLimiter; use crate::state::TimelinePersistentState; -use crate::timeline::{Timeline, TimelineError, get_tenant_dir, get_timeline_dir}; +use crate::timeline::{Timeline, TimelineError, delete_dir, get_tenant_dir, get_timeline_dir}; use crate::timelines_set::TimelinesSet; use crate::wal_storage::Storage; use crate::{SafeKeeperConf, control_file, wal_storage}; @@ -448,23 +448,20 @@ impl GlobalTimelines { .collect() } - /// Cancels timeline, then deletes the corresponding data directory. - /// If only_local, doesn't remove WAL segments in remote storage. - pub(crate) async fn delete( + /// Delete timeline, only locally on this node or globally (also cleaning + /// remote storage WAL), depending on `action` value. + pub(crate) async fn delete_or_exclude( &self, ttid: &TenantTimelineId, - only_local: bool, - ) -> Result { + action: DeleteOrExclude, + ) -> Result { let tli_res = { let state = self.state.lock().unwrap(); if state.tombstones.contains_key(ttid) { // Presence of a tombstone guarantees that a previous deletion has completed and there is no work to do. info!("Timeline {ttid} was already deleted"); - return Ok(TimelineDeleteForceResult { - dir_existed: false, - was_active: false, - }); + return Ok(TimelineDeleteResult { dir_existed: false }); } state.get(ttid) @@ -472,32 +469,47 @@ impl GlobalTimelines { let result = match tli_res { Ok(timeline) => { - let was_active = timeline.broker_active.load(Ordering::Relaxed); + info!("deleting timeline {}, action={:?}", ttid, action); - info!("deleting timeline {}, only_local={}", ttid, only_local); - timeline.shutdown().await; + // If node is getting excluded, check the generation first. + // Then, while holding the lock cancel the timeline; it will be + // unusable after this point, and if node is added back first + // deletion must be completed and node seeded anew. + // + // We would like to avoid holding the lock while waiting for the + // gate to finish as this is deadlock prone, so for actual + // deletion will take it second time. + if let DeleteOrExclude::Exclude(ref mconf) = action { + let shared_state = timeline.read_shared_state().await; + if shared_state.sk.state().mconf.generation > mconf.generation { + return Err(DeleteOrExcludeError::Conflict { + requested: mconf.clone(), + current: shared_state.sk.state().mconf.clone(), + }); + } + timeline.cancel().await; + } else { + timeline.cancel().await; + } + + timeline.close().await; info!("timeline {ttid} shut down for deletion"); // Take a lock and finish the deletion holding this mutex. let mut shared_state = timeline.write_shared_state().await; + let only_local = !matches!(action, DeleteOrExclude::Delete); let dir_existed = timeline.delete(&mut shared_state, only_local).await?; - Ok(TimelineDeleteForceResult { - dir_existed, - was_active, // TODO: we probably should remove this field - }) + Ok(TimelineDeleteResult { dir_existed }) } Err(_) => { // Timeline is not memory, but it may still exist on disk in broken state. let dir_path = get_timeline_dir(self.state.lock().unwrap().conf.as_ref(), ttid); - let dir_existed = delete_dir(dir_path)?; + let dir_existed = delete_dir(&dir_path).await?; - Ok(TimelineDeleteForceResult { - dir_existed, - was_active: false, - }) + Ok(TimelineDeleteResult { dir_existed }) } }; @@ -515,11 +527,11 @@ impl GlobalTimelines { /// retry tenant deletion again later. /// /// If only_local, doesn't remove WAL segments in remote storage. - pub async fn delete_force_all_for_tenant( + pub async fn delete_all_for_tenant( &self, tenant_id: &TenantId, - only_local: bool, - ) -> Result> { + action: DeleteOrExclude, + ) -> Result> { info!("deleting all timelines for tenant {}", tenant_id); let to_delete = self.get_all_for_tenant(*tenant_id); @@ -527,7 +539,7 @@ impl GlobalTimelines { let mut deleted = HashMap::new(); for tli in &to_delete { - match self.delete(&tli.ttid, only_local).await { + match self.delete_or_exclude(&tli.ttid, action.clone()).await { Ok(result) => { deleted.insert(tli.ttid, result); } @@ -541,17 +553,15 @@ impl GlobalTimelines { // If there was an error, return it. if let Some(e) = err { - return Err(e); + return Err(anyhow::Error::from(e)); } // There may be broken timelines on disk, so delete the whole tenant dir as well. // Note that we could concurrently create new timelines while we were deleting them, // so the directory may be not empty. In this case timelines will have bad state // and timeline background jobs can panic. - delete_dir(get_tenant_dir( - self.state.lock().unwrap().conf.as_ref(), - tenant_id, - ))?; + let tenant_dir = get_tenant_dir(self.state.lock().unwrap().conf.as_ref(), tenant_id); + delete_dir(&tenant_dir).await?; Ok(deleted) } @@ -570,18 +580,20 @@ impl GlobalTimelines { } #[derive(Clone, Copy, Serialize)] -pub struct TimelineDeleteForceResult { +pub struct TimelineDeleteResult { pub dir_existed: bool, - pub was_active: bool, } -/// Deletes directory and it's contents. Returns false if directory does not exist. -fn delete_dir(path: Utf8PathBuf) -> Result { - match std::fs::remove_dir_all(path) { - Ok(_) => Ok(true), - Err(e) if e.kind() == std::io::ErrorKind::NotFound => Ok(false), - Err(e) => Err(e.into()), - } +/// Action for delete_or_exclude. +#[derive(Clone, Debug)] +pub enum DeleteOrExclude { + /// Delete timeline globally. + Delete, + /// Legacy mode until we fully migrate to generations: like exclude deletes + /// timeline only locally, but ignores generation number. + DeleteLocal, + /// This node is getting excluded, delete timeline locally. + Exclude(membership::Configuration), } /// Create temp directory for a new timeline. It needs to be located on the same diff --git a/test_runner/fixtures/safekeeper/http.py b/test_runner/fixtures/safekeeper/http.py index 493ce7334e..7038d87aba 100644 --- a/test_runner/fixtures/safekeeper/http.py +++ b/test_runner/fixtures/safekeeper/http.py @@ -273,10 +273,22 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter): assert isinstance(res_json, dict) return res_json + def timeline_exclude( + self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration + ) -> dict[str, Any]: + res = self.put( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/exclude", + data=to.to_json(), + ) + res.raise_for_status() + res_json = res.json() + assert isinstance(res_json, dict) + return res_json + def membership_switch( self, tenant_id: TenantId, timeline_id: TimelineId, to: Configuration ) -> TimelineMembershipSwitchResponse: - res = self.post( + res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/membership", data=to.to_json(), ) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index fd9edb359b..0a05189bfb 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1686,7 +1686,7 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): @pytest.mark.parametrize("auth_enabled", [False, True]) -def test_delete_force(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): +def test_delete(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): neon_env_builder.auth_enabled = auth_enabled env = neon_env_builder.init_start() @@ -2215,13 +2215,21 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 1 env = neon_env_builder.init_start() + # These are expected after timeline deletion on safekeepers. + env.pageserver.allowed_errors.extend( + [ + ".*Timeline .* was not found in global map.*", + ".*Timeline .* was cancelled and cannot be used anymore.*", + ] + ) + tenant_id = env.initial_tenant timeline_id = env.initial_timeline sk = env.safekeepers[0] http_cli = sk.http_client() - sk_id_1 = SafekeeperId(env.safekeepers[0].id, "localhost", sk.port.pg_tenant_only) + sk_id_1 = SafekeeperId(sk.id, "localhost", sk.port.pg_tenant_only) sk_id_2 = SafekeeperId(11, "localhost", 5434) # just a mock # Request to switch before timeline creation should fail. @@ -2249,19 +2257,28 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder): log.info(f"conf after restart: {after_restart}") assert after_restart.generation == 4 - # Switch into disjoint conf. - non_joint = Configuration(generation=5, members=[sk_id_2], new_members=None) + # Switch into non joint conf of which sk is not a member, must fail. + non_joint_not_member = Configuration(generation=5, members=[sk_id_2], new_members=None) + with pytest.raises(requests.exceptions.HTTPError): + resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint_not_member) + + # Switch into good non joint conf. + non_joint = Configuration(generation=6, members=[sk_id_1], new_members=None) resp = http_cli.membership_switch(tenant_id, timeline_id, non_joint) log.info(f"non joint switch resp: {resp}") assert resp.previous_conf.generation == 4 - assert resp.current_conf.generation == 5 + assert resp.current_conf.generation == 6 - # Switch request to lower conf should be ignored. - lower_conf = Configuration(generation=3, members=[], new_members=None) - resp = http_cli.membership_switch(tenant_id, timeline_id, lower_conf) - log.info(f"lower switch resp: {resp}") - assert resp.previous_conf.generation == 5 - assert resp.current_conf.generation == 5 + # Switch request to lower conf should be rejected. + lower_conf = Configuration(generation=3, members=[sk_id_1], new_members=None) + with pytest.raises(requests.exceptions.HTTPError): + http_cli.membership_switch(tenant_id, timeline_id, lower_conf) + + # Now, exclude sk from the membership, timeline should be deleted. + excluded_conf = Configuration(generation=7, members=[sk_id_2], new_members=None) + http_cli.timeline_exclude(tenant_id, timeline_id, excluded_conf) + with pytest.raises(requests.exceptions.HTTPError): + http_cli.timeline_status(tenant_id, timeline_id) # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries From 5cfdb1244f7ebdf2844e9b5f8e15af01389e653b Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Wed, 26 Feb 2025 21:27:16 +0200 Subject: [PATCH 036/207] compute_ctl: Add OTEL tracing to incoming HTTP requests and startup (#10971) We lost this with the switch to axum for the HTTP server. Add it back. In addition to just resurrecting the functionality we had before, pass the tracing context of the /configure HTTP request to the start_postgres operation that runs in the main thread. This way, the 'start_postgres' and all its sub-spans like getting the basebackup become children of the HTTP request span. This allows end-to-end tracing of a compute start, all the way from the proxy to the SQL queries executed by compute_ctl as part of compute startup. --- Cargo.lock | 24 ++++++++++++++++++---- Cargo.toml | 4 ++++ compute_tools/Cargo.toml | 2 ++ compute_tools/src/bin/compute_ctl.rs | 15 ++++++++++++++ compute_tools/src/compute.rs | 17 +++++++++++++++ compute_tools/src/http/routes/configure.rs | 7 ++++++- compute_tools/src/http/server.rs | 1 + 7 files changed, 65 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 47552174d2..7d11f2b7fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1342,7 +1342,9 @@ dependencies = [ "tokio-util", "tower 0.5.2", "tower-http", + "tower-otel", "tracing", + "tracing-opentelemetry", "tracing-subscriber", "tracing-utils", "url", @@ -4484,18 +4486,18 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.0" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c95a7476719eab1e366eaf73d0260af3021184f18177925b07f54b30089ceead" +checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.0" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39407670928234ebc5e6e580247dd567ad73a3578460c5990f9503df207e8f07" +checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" dependencies = [ "proc-macro2", "quote", @@ -7294,6 +7296,20 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" +[[package]] +name = "tower-otel" +version = "0.2.0" +source = "git+https://github.com/mattiapenati/tower-otel?rev=56a7321053bcb72443888257b622ba0d43a11fcd#56a7321053bcb72443888257b622ba0d43a11fcd" +dependencies = [ + "http 1.1.0", + "opentelemetry", + "pin-project", + "tower-layer", + "tower-service", + "tracing", + "tracing-opentelemetry", +] + [[package]] name = "tower-service" version = "0.3.3" diff --git a/Cargo.toml b/Cargo.toml index e6ca3c982c..223ff4249e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -193,6 +193,10 @@ toml_edit = "0.22" tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]} tower = { version = "0.5.2", default-features = false } tower-http = { version = "0.6.2", features = ["request-id", "trace"] } + +# This revision uses opentelemetry 0.27. There's no tag for it. +tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" } + tower-service = "0.3.3" tracing = "0.1" tracing-error = "0.2" diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index ba2c304141..8f3bcbeef8 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -46,7 +46,9 @@ tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tokio-postgres.workspace = true tokio-util.workspace = true tokio-stream.workspace = true +tower-otel.workspace = true tracing.workspace = true +tracing-opentelemetry.workspace = true tracing-subscriber.workspace = true tracing-utils.workspace = true thiserror.workspace = true diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index efe707cb7c..6dae1a2753 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -406,6 +406,21 @@ fn start_postgres( ) -> Result<(Option, StartPostgresResult)> { // We got all we need, update the state. let mut state = compute.state.lock().unwrap(); + + // Create a tracing span for the startup operation. + // + // We could otherwise just annotate the function with #[instrument], but if + // we're being configured from a /configure HTTP request, we want the + // startup to be considered part of the /configure request. + let _this_entered = { + // Temporarily enter the /configure request's span, so that the new span + // becomes its child. + let _parent_entered = state.startup_span.take().map(|p| p.entered()); + + tracing::info_span!("start_postgres") + } + .entered(); + state.set_status(ComputeStatus::Init, &compute.state_changed); info!( diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index e3c70ba622..27dc05e71f 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -110,7 +110,23 @@ pub struct ComputeState { /// compute wasn't used since start. pub last_active: Option>, pub error: Option, + + /// Compute spec. This can be received from the CLI or - more likely - + /// passed by the control plane with a /configure HTTP request. pub pspec: Option, + + /// If the spec is passed by a /configure request, 'startup_span' is the + /// /configure request's tracing span. The main thread enters it when it + /// processes the compute startup, so that the compute startup is considered + /// to be part of the /configure request for tracing purposes. + /// + /// If the request handling thread/task called startup_compute() directly, + /// it would automatically be a child of the request handling span, and we + /// wouldn't need this. But because we use the main thread to perform the + /// startup, and the /configure task just waits for it to finish, we need to + /// set up the span relationship ourselves. + pub startup_span: Option, + pub metrics: ComputeMetrics, } @@ -122,6 +138,7 @@ impl ComputeState { last_active: None, error: None, pspec: None, + startup_span: None, metrics: ComputeMetrics::default(), } } diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs index a2892196b7..63d428fff4 100644 --- a/compute_tools/src/http/routes/configure.rs +++ b/compute_tools/src/http/routes/configure.rs @@ -45,13 +45,18 @@ pub(in crate::http) async fn configure( return JsonResponse::invalid_status(state.status); } + // Pass the tracing span to the main thread that performs the startup, + // so that the start_compute operation is considered a child of this + // configure request for tracing purposes. + state.startup_span = Some(tracing::Span::current()); + state.pspec = Some(pspec); state.set_status(ComputeStatus::ConfigurationPending, &compute.state_changed); drop(state); } // Spawn a blocking thread to wait for compute to become Running. This is - // needed to do not block the main pool of workers and be able to serve + // needed to not block the main pool of workers and to be able to serve // other requests while some particular request is waiting for compute to // finish configuration. let c = compute.clone(); diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index efd18afc78..7283401bb5 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -121,6 +121,7 @@ impl From for Router> { ) .layer(PropagateRequestIdLayer::x_request_id()), ) + .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO)) } } From 11aab9f0deaa77d6b8335605220e9a7cdf2f4049 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 26 Feb 2025 13:50:10 -0600 Subject: [PATCH 037/207] fix(pageserver): further stablize gc-compaction tests (#10975) ## Problem Yet another source of flakyness for https://github.com/neondatabase/neon/issues/10517 ## Summary of changes The test scenario we want to create is that we have an image layer in index_part and then overwrite it, so we have to ensure it gets persisted in index_part by doing a force checkpoint. Signed-off-by: Alex Chi Z --- test_runner/regress/test_compaction.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index ce8ed3c7c5..0df88e14c2 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -319,8 +319,12 @@ def test_pageserver_gc_compaction_idempotent( }, ) wait_until(compaction_finished, timeout=60) + workload.validate(env.pageserver.id) + # Ensure all data are uploaded so that the duplicated layer gets into index_part.json + ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_flushed=True) if compaction_mode == "after_restart": env.pageserver.restart(True) + workload.validate(env.pageserver.id) ps_http.timeline_gc( tenant_id, timeline_id, None ) # Force refresh gc info to have gc_cutoff generated @@ -335,6 +339,7 @@ def test_pageserver_gc_compaction_idempotent( "sub_compaction_max_job_size_mb": 16, }, ) + workload.validate(env.pageserver.id) wait_until(compaction_finished, timeout=60) # ensure gc_compaction is scheduled and it's actually running (instead of skipping due to no layers picked) From 4f4a3910d0f49602ca14eb259e5e5dca0ef60c29 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Wed, 26 Feb 2025 20:54:46 +0100 Subject: [PATCH 038/207] fix error (Line: 74, Col: 26): Unexpected value 'false' (#10999) ## Problem Check neon with extra platform builds is failing on main with: ``` The template is not valid. .github/workflows/neon_extra_builds.yml (Line: 74, Col: 26): Unexpected value 'false' ``` https://github.com/neondatabase/neon/actions/runs/13549634905 ## Summary of changes Use `fromJson()` to have `false` as boolean value. thanks to @skyzh for pointing on the issue --- .github/workflows/neon_extra_builds.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/neon_extra_builds.yml b/.github/workflows/neon_extra_builds.yml index f077e04d1c..90318747b3 100644 --- a/.github/workflows/neon_extra_builds.yml +++ b/.github/workflows/neon_extra_builds.yml @@ -71,7 +71,7 @@ jobs: uses: ./.github/workflows/build-macos.yml with: pg_versions: ${{ needs.files-changed.outputs.postgres_changes }} - rebuild_rust_code: ${{ needs.files-changed.outputs.rebuild_rust_code }} + rebuild_rust_code: ${{ fromJson(needs.files-changed.outputs.rebuild_rust_code) }} rebuild_everything: ${{ fromJson(needs.files-changed.outputs.rebuild_everything) }} gather-rust-build-stats: From c50b38ab722f080ad719d116a8c76f79e472cd76 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Feb 2025 01:38:45 +0200 Subject: [PATCH 039/207] compute_ctl: Fix comment on start_postgres (#11005) The comment was woefully outdated and outright wrong. It applied a long time ago (before commit e5cc2f92c4 to be precise), but nowadays the function just launches postgres and waits until it starts accepting connections. The other things the comment talked about are done in other functions. --- compute_tools/src/compute.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 27dc05e71f..97fa45062b 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -788,8 +788,9 @@ impl ComputeNode { Ok(()) } - /// Start Postgres as a child process and manage DBs/roles. - /// After that this will hang waiting on the postmaster process to exit. + /// Start Postgres as a child process and wait for it to start accepting + /// connections. + /// /// Returns a handle to the child process and a handle to the logs thread. #[instrument(skip_all)] pub fn start_postgres( From 8b86cd115408942767175c8b9544babd18600209 Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Thu, 27 Feb 2025 09:13:30 +0300 Subject: [PATCH 040/207] safekeeper: follow membership configuration rules (#10781) ## Problem safekeepers must ignore walproposer messages with non matching membership conf. ## Summary of changes Make safekeepers reject vote request, proposer elected and append request messages with non matching generation. Switch to the configuration in the greeting message if it is higher. In passing, fix one comment and WAL truncation. Last part of https://github.com/neondatabase/neon/issues/9965 --- safekeeper/src/safekeeper.rs | 77 ++++++++++++++++++++++++++++++----- safekeeper/src/state.rs | 2 +- safekeeper/src/wal_storage.rs | 1 + 3 files changed, 69 insertions(+), 11 deletions(-) diff --git a/safekeeper/src/safekeeper.rs b/safekeeper/src/safekeeper.rs index 0edac04b97..886cac869d 100644 --- a/safekeeper/src/safekeeper.rs +++ b/safekeeper/src/safekeeper.rs @@ -279,7 +279,7 @@ pub struct VoteResponse { * Proposer -> Acceptor message announcing proposer is elected and communicating * term history to it. */ -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ProposerElected { pub generation: Generation, // membership conf generation pub term: Term, @@ -1013,6 +1013,9 @@ where self.state.finish_change(&state).await?; } + // Switch into conf given by proposer conf if it is higher. + self.state.membership_switch(msg.mconf.clone()).await?; + let apg = AcceptorGreeting { node_id: self.node_id, mconf: self.state.mconf.clone(), @@ -1030,16 +1033,18 @@ where &mut self, msg: &VoteRequest, ) -> Result> { + if self.state.mconf.generation != msg.generation { + bail!( + "refusing {:?} due to generation mismatch: sk generation {}", + msg, + self.state.mconf.generation + ); + } // Once voted, we won't accept data from older proposers; flush // everything we've already received so that new proposer starts - // streaming at end of our WAL, without overlap. Currently we truncate - // WAL at streaming point, so this avoids truncating already committed - // WAL. - // - // TODO: it would be smoother to not truncate committed piece at - // handle_elected instead. Currently not a big deal, as proposer is the - // only source of WAL; with peer2peer recovery it would be more - // important. + // streaming at end of our WAL, without overlap. WAL is truncated at + // streaming point and commit_lsn may be advanced from peers, so this + // also avoids possible spurious attempt to truncate committed WAL. self.wal_store.flush_wal().await?; // initialize with refusal let mut resp = VoteResponse { @@ -1093,6 +1098,13 @@ where self.get_last_log_term(), self.flush_lsn() ); + if self.state.mconf.generation != msg.generation { + bail!( + "refusing {:?} due to generation mismatch: sk generation {}", + msg, + self.state.mconf.generation + ); + } if self.state.acceptor_state.term < msg.term { let mut state = self.state.start_change(); state.acceptor_state.term = msg.term; @@ -1263,11 +1275,24 @@ where msg: &AppendRequest, require_flush: bool, ) -> Result> { + // Refuse message on generation mismatch. On reconnect wp will get full + // configuration from greeting. + if self.state.mconf.generation != msg.h.generation { + bail!( + "refusing append request due to generation mismatch: request {}, sk {}", + msg.h.generation, + self.state.mconf.generation + ); + } + if self.state.acceptor_state.term < msg.h.term { bail!("got AppendRequest before ProposerElected"); } - // If our term is higher, immediately refuse the message. + // If our term is higher, immediately refuse the message. Send term only + // response; elected walproposer can never advance the term, so it will + // figure out the refusal from it -- which is important as term change + // should cause not just reconnection but whole walproposer re-election. if self.state.acceptor_state.term > msg.h.term { let resp = AppendResponse::term_only( self.state.mconf.generation, @@ -1468,6 +1493,13 @@ mod tests { let wal_store = DummyWalStore { lsn: Lsn(0) }; let mut sk = SafeKeeper::new(TimelineState::new(storage), wal_store, NodeId(0)).unwrap(); + // Vote with generation mismatch should be rejected. + let gen_mismatch_vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { + generation: SafekeeperGeneration::new(42), + term: 1, + }); + assert!(sk.process_msg(&gen_mismatch_vote_request).await.is_err()); + // check voting for 1 is ok let vote_request = ProposerAcceptorMessage::VoteRequest(VoteRequest { generation: Generation::new(0), @@ -1532,6 +1564,16 @@ mod tests { }, ]), }; + + // check that elected msg with generation mismatch is rejected + let mut pem_gen_mismatch = pem.clone(); + pem_gen_mismatch.generation = SafekeeperGeneration::new(42); + assert!( + sk.process_msg(&ProposerAcceptorMessage::Elected(pem_gen_mismatch)) + .await + .is_err() + ); + sk.process_msg(&ProposerAcceptorMessage::Elected(pem)) .await .unwrap(); @@ -1590,6 +1632,21 @@ mod tests { wal_data: Bytes::from_static(b"b"), }; + // check that append request with generation mismatch is rejected + let mut ar_hdr_gen_mismatch = ar_hdr.clone(); + ar_hdr_gen_mismatch.generation = SafekeeperGeneration::new(42); + let append_request_gen_mismatch = AppendRequest { + h: ar_hdr_gen_mismatch, + wal_data: Bytes::from_static(b"b"), + }; + assert!( + sk.process_msg(&ProposerAcceptorMessage::AppendRequest( + append_request_gen_mismatch + )) + .await + .is_err() + ); + // do write ending at 2, it should be ok sk.process_msg(&ProposerAcceptorMessage::AppendRequest(append_request)) .await diff --git a/safekeeper/src/state.rs b/safekeeper/src/state.rs index e437e6d2cd..7533005c35 100644 --- a/safekeeper/src/state.rs +++ b/safekeeper/src/state.rs @@ -268,7 +268,7 @@ where // Is switch allowed? if to.generation <= self.mconf.generation { info!( - "ignoring request to switch membership conf to lower {}, current conf {}", + "ignoring request to switch membership conf to {}, current conf {}", to, self.mconf ); } else { diff --git a/safekeeper/src/wal_storage.rs b/safekeeper/src/wal_storage.rs index ed197a3f83..f0bac4b40a 100644 --- a/safekeeper/src/wal_storage.rs +++ b/safekeeper/src/wal_storage.rs @@ -574,6 +574,7 @@ impl Storage for PhysicalStorage { } self.pending_wal_truncation = false; + info!("truncated WAL to {}", end_pos); Ok(()) } From c92a36740bf0f3c478836bf103b8ffa232e1fb8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 27 Feb 2025 10:05:15 +0100 Subject: [PATCH 041/207] fix(ci): support PR-on-top-of-PR usecase again (#11013) ## Problem https://github.com/neondatabase/neon/pull/10841 broke CI on PRs that aren't based on main or a release branch but want to merge into another PR. ## Summary of changes Replace `run-kind=pr-main` with `run-kind=pr`, so that all PRs that aren't release PRs are treated equally. --- .github/workflows/_meta.yml | 6 +++--- .github/workflows/build_and_test.yml | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index ac09a51ae9..9454533fbb 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -19,7 +19,7 @@ on: description: "Tag of the last compute release" value: ${{ jobs.tags.outputs.compute }} run-kind: - description: "The kind of run we're currently in. Will be one of `pr-main`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`" + description: "The kind of run we're currently in. Will be one of `pr`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`" value: ${{ jobs.tags.outputs.run-kind }} permissions: {} @@ -51,10 +51,10 @@ jobs: || (inputs.github-event-name == 'push' && github.ref_name == 'release') && 'storage-release' || (inputs.github-event-name == 'push' && github.ref_name == 'release-compute') && 'compute-release' || (inputs.github-event-name == 'push' && github.ref_name == 'release-proxy') && 'proxy-release' - || (inputs.github-event-name == 'pull_request' && github.base_ref == 'main') && 'pr-main' || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release') && 'storage-rc-pr' || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr' || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy') && 'proxy-rc-pr' + || (inputs.github-event-name == 'pull_request') && 'pr' || 'unknown' }} run: | @@ -81,7 +81,7 @@ jobs: compute-release) echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT ;; - pr-main|storage-rc-pr|compute-rc-pr|proxy-rc-pr) + pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr) BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT ;; diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1fd4f292e8..0dcf11d32f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -487,7 +487,7 @@ jobs: neon-image-arch: needs: [ check-permissions, build-build-tools-image, meta ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} strategy: matrix: arch: [ x64, arm64 ] @@ -537,7 +537,7 @@ jobs: neon-image: needs: [ neon-image-arch, meta ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 permissions: id-token: write # aws-actions/configure-aws-credentials @@ -559,7 +559,7 @@ jobs: compute-node-image-arch: needs: [ check-permissions, build-build-tools-image, meta ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -651,7 +651,7 @@ jobs: compute-node-image: needs: [ compute-node-image-arch, meta ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -694,7 +694,7 @@ jobs: vm-compute-node-image: needs: [ check-permissions, meta, compute-node-image ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: [ self-hosted, large ] strategy: fail-fast: false @@ -775,7 +775,7 @@ jobs: # Ensure that we don't have bad versions. - name: Verify image versions shell: bash # ensure no set -e for better error messages - if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} run: | pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version") @@ -823,12 +823,12 @@ jobs: - name: Test extension upgrade timeout-minutes: 20 - if: ${{ contains(fromJSON('["pr-main", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} env: TAG: >- ${{ false - || needs.meta.outputs.run-kind == 'pr-main' && needs.meta.outputs.build-tag + || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release }} TEST_EXTENSIONS_TAG: latest @@ -870,7 +870,7 @@ jobs: push-neon-image-dev: needs: [ meta, generate-image-maps, neon-image ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -888,7 +888,7 @@ jobs: push-compute-image-dev: needs: [ meta, generate-image-maps, vm-compute-node-image ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -955,7 +955,7 @@ jobs: trigger-custom-extensions-build-and-wait: needs: [ check-permissions, meta ] - if: ${{ contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 permissions: id-token: write # aws-actions/configure-aws-credentials @@ -1347,7 +1347,7 @@ jobs: || needs.check-codestyle-python.result == 'skipped' || needs.check-codestyle-rust.result == 'skipped' || needs.files-changed.result == 'skipped' - || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) - || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)) + || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) + || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)) || needs.test-images.result == 'skipped' - || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr-main", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) + || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) From f09843ef17003f015b7469ffc0437794997e2e45 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 27 Feb 2025 10:26:25 +0100 Subject: [PATCH 042/207] refactor(pageserver): propagate RequestContext to layer downloads (#11001) For some reason the layer download API never fully got `RequestContext`-infected. This PR fixes that as a precursor to - https://github.com/neondatabase/neon/issues/6107 --- pageserver/compaction/src/compact_tiered.rs | 4 +- pageserver/compaction/src/interface.rs | 1 + pageserver/compaction/src/simulator.rs | 1 + pageserver/src/http/routes.rs | 18 +++++--- .../src/tenant/storage_layer/delta_layer.rs | 2 +- pageserver/src/tenant/storage_layer/layer.rs | 46 +++++++++---------- .../src/tenant/storage_layer/layer/tests.rs | 22 ++++++--- pageserver/src/tenant/timeline.rs | 21 +++++++-- pageserver/src/tenant/timeline/compaction.rs | 11 +++-- .../src/tenant/timeline/detach_ancestor.rs | 2 +- .../timeline/heatmap_layers_downloader.rs | 9 +++- 11 files changed, 84 insertions(+), 53 deletions(-) diff --git a/pageserver/compaction/src/compact_tiered.rs b/pageserver/compaction/src/compact_tiered.rs index 7779ffaf8b..02b11910ce 100644 --- a/pageserver/compaction/src/compact_tiered.rs +++ b/pageserver/compaction/src/compact_tiered.rs @@ -307,7 +307,7 @@ where let mut layer_ids: Vec = Vec::new(); for layer_id in &job.input_layers { let layer = &self.layers[layer_id.0].layer; - if let Some(dl) = self.executor.downcast_delta_layer(layer).await? { + if let Some(dl) = self.executor.downcast_delta_layer(layer, ctx).await? { deltas.push(dl.clone()); layer_ids.push(*layer_id); } @@ -536,7 +536,7 @@ where let mut deltas: Vec = Vec::new(); for layer_id in &job.input_layers { let l = &self.layers[layer_id.0]; - if let Some(dl) = self.executor.downcast_delta_layer(&l.layer).await? { + if let Some(dl) = self.executor.downcast_delta_layer(&l.layer, ctx).await? { deltas.push(dl.clone()); } } diff --git a/pageserver/compaction/src/interface.rs b/pageserver/compaction/src/interface.rs index 8ed393a645..92723faeaf 100644 --- a/pageserver/compaction/src/interface.rs +++ b/pageserver/compaction/src/interface.rs @@ -55,6 +55,7 @@ pub trait CompactionJobExecutor { fn downcast_delta_layer( &self, layer: &Self::Layer, + ctx: &Self::RequestContext, ) -> impl Future>> + Send; // ---- diff --git a/pageserver/compaction/src/simulator.rs b/pageserver/compaction/src/simulator.rs index 673b80c313..341fceba6f 100644 --- a/pageserver/compaction/src/simulator.rs +++ b/pageserver/compaction/src/simulator.rs @@ -487,6 +487,7 @@ impl interface::CompactionJobExecutor for MockTimeline { async fn downcast_delta_layer( &self, layer: &MockLayer, + _ctx: &MockRequestContext, ) -> anyhow::Result>> { Ok(match layer { MockLayer::Delta(l) => Some(l.clone()), diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index dd5a24a41f..b738d22740 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -927,11 +927,10 @@ async fn get_lsn_by_timestamp_handler( let with_lease = parse_query_param(&request, "with_lease")?.unwrap_or(false); - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); - let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let result = timeline .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx) .await?; @@ -1000,10 +999,10 @@ async fn get_timestamp_of_lsn_handler( .with_context(|| format!("Invalid LSN: {lsn_str:?}")) .map_err(ApiError::BadRequest)?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?; match result { @@ -1368,7 +1367,7 @@ async fn timeline_layer_scan_disposable_keys( }; let resident_layer = layer - .download_and_keep_resident() + .download_and_keep_resident(&ctx) .await .map_err(|err| match err { tenant::storage_layer::layer::DownloadError::TimelineShutdown @@ -1443,6 +1442,7 @@ async fn timeline_download_heatmap_layers_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let max_concurrency = get_config(&request) .remote_storage_config @@ -1451,7 +1451,9 @@ async fn timeline_download_heatmap_layers_handler( .unwrap_or(DEFAULT_MAX_CONCURRENCY); let concurrency = std::cmp::min(max_concurrency, desired_concurrency); - timeline.start_heatmap_layers_download(concurrency).await?; + timeline + .start_heatmap_layers_download(concurrency, &ctx) + .await?; json_response(StatusCode::ACCEPTED, ()) } @@ -1490,8 +1492,9 @@ async fn layer_download_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let downloaded = timeline - .download_layer(&layer_name) + .download_layer(&layer_name, &ctx) .await .map_err(|e| match e { tenant::storage_layer::layer::DownloadError::TimelineShutdown @@ -2389,7 +2392,8 @@ async fn timeline_download_remote_layers_handler_post( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - match timeline.spawn_download_all_remote_layers(body).await { + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + match timeline.spawn_download_all_remote_layers(body, &ctx).await { Ok(st) => json_response(StatusCode::ACCEPTED, st), Err(st) => json_response(StatusCode::CONFLICT, st), } diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index d9afdc2405..83ac6aab51 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -2083,7 +2083,7 @@ pub(crate) mod test { .await .unwrap(); - let new_layer = new_layer.download_and_keep_resident().await.unwrap(); + let new_layer = new_layer.download_and_keep_resident(ctx).await.unwrap(); new_layer .copy_delta_prefix(&mut writer, truncate_at, ctx) diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index ae06aca63b..bde7fbc1f9 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -324,16 +324,16 @@ impl Layer { reconstruct_data: &mut ValuesReconstructState, ctx: &RequestContext, ) -> Result<(), GetVectoredError> { - let downloaded = self - .0 - .get_or_maybe_download(true, Some(ctx)) - .await - .map_err(|err| match err { - DownloadError::TimelineShutdown | DownloadError::DownloadCancelled => { - GetVectoredError::Cancelled - } - other => GetVectoredError::Other(anyhow::anyhow!(other)), - })?; + let downloaded = + self.0 + .get_or_maybe_download(true, ctx) + .await + .map_err(|err| match err { + DownloadError::TimelineShutdown | DownloadError::DownloadCancelled => { + GetVectoredError::Cancelled + } + other => GetVectoredError::Other(anyhow::anyhow!(other)), + })?; let this = ResidentLayer { downloaded: downloaded.clone(), owner: self.clone(), @@ -356,8 +356,8 @@ impl Layer { /// Download the layer if evicted. /// /// Will not error when the layer is already downloaded. - pub(crate) async fn download(&self) -> Result<(), DownloadError> { - self.0.get_or_maybe_download(true, None).await?; + pub(crate) async fn download(&self, ctx: &RequestContext) -> Result<(), DownloadError> { + self.0.get_or_maybe_download(true, ctx).await?; Ok(()) } @@ -392,8 +392,11 @@ impl Layer { } /// Downloads if necessary and creates a guard, which will keep this layer from being evicted. - pub(crate) async fn download_and_keep_resident(&self) -> Result { - let downloaded = self.0.get_or_maybe_download(true, None).await?; + pub(crate) async fn download_and_keep_resident( + &self, + ctx: &RequestContext, + ) -> Result { + let downloaded = self.0.get_or_maybe_download(true, ctx).await?; Ok(ResidentLayer { downloaded, @@ -446,7 +449,7 @@ impl Layer { if verbose { // for now, unconditionally download everything, even if that might not be wanted. - let l = self.0.get_or_maybe_download(true, Some(ctx)).await?; + let l = self.0.get_or_maybe_download(true, ctx).await?; l.dump(&self.0, ctx).await? } @@ -945,7 +948,7 @@ impl LayerInner { async fn get_or_maybe_download( self: &Arc, allow_download: bool, - ctx: Option<&RequestContext>, + ctx: &RequestContext, ) -> Result, DownloadError> { let (weak, permit) = { // get_or_init_detached can: @@ -1035,21 +1038,14 @@ impl LayerInner { return Err(DownloadError::NotFile(ft)); } - if let Some(ctx) = ctx { - self.check_expected_download(ctx)?; - } + self.check_expected_download(ctx)?; if !allow_download { // this is only used from tests, but it is hard to test without the boolean return Err(DownloadError::DownloadRequired); } - let download_ctx = ctx - .map(|ctx| ctx.detached_child(TaskKind::LayerDownload, DownloadBehavior::Download)) - .unwrap_or(RequestContext::new( - TaskKind::LayerDownload, - DownloadBehavior::Download, - )); + let download_ctx = ctx.detached_child(TaskKind::LayerDownload, DownloadBehavior::Download); async move { tracing::info!(%reason, "downloading on-demand"); diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index 724150d27f..d43dfefdbc 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -178,7 +178,7 @@ async fn smoke_test() { // plain downloading is rarely needed layer - .download_and_keep_resident() + .download_and_keep_resident(&ctx) .instrument(download_span) .await .unwrap(); @@ -379,7 +379,7 @@ fn read_wins_pending_eviction() { // because no actual eviction happened, we get to just reinitialize the DownloadedLayer layer .0 - .get_or_maybe_download(false, None) + .get_or_maybe_download(false, &ctx) .instrument(download_span) .await .expect("should had reinitialized without downloading"); @@ -514,7 +514,7 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) { // because no actual eviction happened, we get to just reinitialize the DownloadedLayer layer .0 - .get_or_maybe_download(false, None) + .get_or_maybe_download(false, &ctx) .instrument(download_span) .await .expect("should had reinitialized without downloading"); @@ -642,6 +642,11 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { .await .unwrap(); + // This test does downloads + let ctx = RequestContextBuilder::extend(&ctx) + .download_behavior(DownloadBehavior::Download) + .build(); + let layer = { let mut layers = { let layers = timeline.layers.read().await; @@ -674,7 +679,7 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { // simulate a cancelled read which is cancelled before it gets to re-initialize let e = layer .0 - .get_or_maybe_download(false, None) + .get_or_maybe_download(false, &ctx) .await .unwrap_err(); assert!( @@ -698,7 +703,7 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { // failpoint is still enabled, but it is not hit let e = layer .0 - .get_or_maybe_download(false, None) + .get_or_maybe_download(false, &ctx) .await .unwrap_err(); assert!(matches!(e, DownloadError::DownloadRequired), "{e:?}"); @@ -722,6 +727,11 @@ async fn evict_and_wait_does_not_wait_for_download() { .await .unwrap(); + // This test does downloads + let ctx = RequestContextBuilder::extend(&ctx) + .download_behavior(DownloadBehavior::Download) + .build(); + let layer = { let mut layers = { let layers = timeline.layers.read().await; @@ -768,7 +778,7 @@ async fn evict_and_wait_does_not_wait_for_download() { let mut download = std::pin::pin!( layer .0 - .get_or_maybe_download(true, None) + .get_or_maybe_download(true, &ctx) .instrument(download_span) ); diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index dfa50d498c..3164cdbdd2 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2197,6 +2197,7 @@ impl Timeline { pub(crate) async fn download_layer( &self, layer_file_name: &LayerName, + ctx: &RequestContext, ) -> Result, super::storage_layer::layer::DownloadError> { let Some(layer) = self .find_layer(layer_file_name) @@ -2210,7 +2211,7 @@ impl Timeline { return Ok(None); }; - layer.download().await?; + layer.download(ctx).await?; Ok(Some(true)) } @@ -6210,6 +6211,7 @@ impl Timeline { pub(crate) async fn spawn_download_all_remote_layers( self: Arc, request: DownloadRemoteLayersTaskSpawnRequest, + ctx: &RequestContext, ) -> Result { use pageserver_api::models::DownloadRemoteLayersTaskState; @@ -6230,6 +6232,10 @@ impl Timeline { } let self_clone = Arc::clone(&self); + let task_ctx = ctx.detached_child( + TaskKind::DownloadAllRemoteLayers, + DownloadBehavior::Download, + ); let task_id = task_mgr::spawn( task_mgr::BACKGROUND_RUNTIME.handle(), task_mgr::TaskKind::DownloadAllRemoteLayers, @@ -6237,7 +6243,7 @@ impl Timeline { Some(self.timeline_id), "download all remote layers task", async move { - self_clone.download_all_remote_layers(request).await; + self_clone.download_all_remote_layers(request, &task_ctx).await; let mut status_guard = self_clone.download_all_remote_layers_task_info.write().unwrap(); match &mut *status_guard { None => { @@ -6272,6 +6278,7 @@ impl Timeline { async fn download_all_remote_layers( self: &Arc, request: DownloadRemoteLayersTaskSpawnRequest, + ctx: &RequestContext, ) { use pageserver_api::models::DownloadRemoteLayersTaskState; @@ -6328,9 +6335,10 @@ impl Timeline { let span = tracing::info_span!("download", layer = %next); + let ctx = ctx.attached_child(); js.spawn( async move { - let res = next.download().await; + let res = next.download(&ctx).await; (next, res) } .instrument(span), @@ -6920,6 +6928,7 @@ mod tests { use utils::lsn::Lsn; use super::HeatMapTimeline; + use crate::context::RequestContextBuilder; use crate::tenant::harness::{TenantHarness, test_img}; use crate::tenant::layer_map::LayerMap; use crate::tenant::storage_layer::{Layer, LayerName, LayerVisibilityHint}; @@ -7056,8 +7065,12 @@ mod tests { eprintln!("Downloading {layer} and re-generating heatmap"); + let ctx = &RequestContextBuilder::extend(&ctx) + .download_behavior(crate::context::DownloadBehavior::Download) + .build(); + let _resident = layer - .download_and_keep_resident() + .download_and_keep_resident(ctx) .instrument(tracing::info_span!( parent: None, "download_layer", diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 091bd583d7..3f2f1a6e5f 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1153,7 +1153,7 @@ impl Timeline { // - We do not run concurrently with other kinds of compaction, so the only layer map writes we race with are: // - GC, which at worst witnesses us "undelete" a layer that they just deleted. // - ingestion, which only inserts layers, therefore cannot collide with us. - let resident = layer.download_and_keep_resident().await?; + let resident = layer.download_and_keep_resident(ctx).await?; let keys_written = resident .filter(&self.shard_identity, &mut image_layer_writer, ctx) @@ -1381,14 +1381,14 @@ impl Timeline { let mut fully_compacted = true; - deltas_to_compact.push(first_level0_delta.download_and_keep_resident().await?); + deltas_to_compact.push(first_level0_delta.download_and_keep_resident(ctx).await?); for l in level0_deltas_iter { let lsn_range = &l.layer_desc().lsn_range; if lsn_range.start != prev_lsn_end { break; } - deltas_to_compact.push(l.download_and_keep_resident().await?); + deltas_to_compact.push(l.download_and_keep_resident(ctx).await?); deltas_to_compact_bytes += l.metadata().file_size; prev_lsn_end = lsn_range.end; @@ -2828,7 +2828,7 @@ impl Timeline { total_downloaded_size += layer.layer_desc().file_size; } total_layer_size += layer.layer_desc().file_size; - let resident_layer = layer.download_and_keep_resident().await?; + let resident_layer = layer.download_and_keep_resident(ctx).await?; downloaded_layers.push(resident_layer); } info!( @@ -3404,6 +3404,7 @@ impl CompactionJobExecutor for TimelineAdaptor { async fn downcast_delta_layer( &self, layer: &OwnArc, + ctx: &RequestContext, ) -> anyhow::Result> { // this is a lot more complex than a simple downcast... if layer.is_delta() { @@ -3411,7 +3412,7 @@ impl CompactionJobExecutor for TimelineAdaptor { let guard = self.timeline.layers.read().await; guard.get_from_desc(layer) }; - let result = l.download_and_keep_resident().await?; + let result = l.download_and_keep_resident(ctx).await?; Ok(Some(ResidentDeltaLayer(result))) } else { diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index 71bd196344..b08003d04a 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -629,7 +629,7 @@ async fn copy_lsn_prefix( .with_context(|| format!("prepare to copy lsn prefix of ancestors {layer}")) .map_err(Error::Prepare)?; - let resident = layer.download_and_keep_resident().await.map_err(|e| { + let resident = layer.download_and_keep_resident(ctx).await.map_err(|e| { if e.is_cancelled() { Error::ShuttingDown } else { diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs index 27243ba378..184c830464 100644 --- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs +++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs @@ -10,6 +10,8 @@ use http_utils::error::ApiError; use tokio_util::sync::CancellationToken; use utils::sync::gate::Gate; +use crate::context::RequestContext; + use super::Timeline; // This status is not strictly necessary now, but gives us a nice place @@ -30,6 +32,7 @@ impl HeatmapLayersDownloader { fn new( timeline: Arc, concurrency: usize, + ctx: RequestContext, ) -> Result { let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?; @@ -63,6 +66,7 @@ impl HeatmapLayersDownloader { let stream = futures::stream::iter(heatmap.layers.into_iter().filter_map( |layer| { + let ctx = ctx.attached_child(); let tl = timeline.clone(); let dl_guard = match downloads_guard.enter() { Ok(g) => g, @@ -75,7 +79,7 @@ impl HeatmapLayersDownloader { Some(async move { let _dl_guard = dl_guard; - let res = tl.download_layer(&layer.name).await; + let res = tl.download_layer(&layer.name, &ctx).await; if let Err(err) = res { if !err.is_cancelled() { tracing::warn!(layer=%layer.name,"Failed to download heatmap layer: {err}") @@ -139,10 +143,11 @@ impl Timeline { pub(crate) async fn start_heatmap_layers_download( self: &Arc, concurrency: usize, + ctx: &RequestContext, ) -> Result<(), ApiError> { let mut locked = self.heatmap_layers_downloader.lock().unwrap(); if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) { - let dl = HeatmapLayersDownloader::new(self.clone(), concurrency)?; + let dl = HeatmapLayersDownloader::new(self.clone(), concurrency, ctx.attached_child())?; *locked = Some(dl); Ok(()) } else { From a22be5af72d4c44fcbb320371eaf0f430e008242 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 27 Feb 2025 10:40:40 +0100 Subject: [PATCH 043/207] Migrate the last crates to edition 2024 (#10998) Migrates the remaining crates to edition 2024. We like to stay on the latest edition if possible. There is no functional changes, however some code changes had to be done to accommodate the edition's breaking changes. Like the previous migration PRs, this is comprised of three commits: * the first does the edition update and makes `cargo check`/`cargo clippy` pass. we had to update bindgen to make its output [satisfy the requirements of edition 2024](https://doc.rust-lang.org/edition-guide/rust-2024/unsafe-extern.html) * the second commit does a `cargo fmt` for the new style edition. * the third commit reorders imports as a one-off change. As before, it is entirely optional. Part of #10918 --- Cargo.lock | 16 +++-- Cargo.toml | 4 +- control_plane/src/background_process.rs | 2 +- control_plane/src/bin/neon_local.rs | 43 +++++++------ control_plane/src/broker.rs | 1 - control_plane/src/endpoint.rs | 30 ++++------ control_plane/src/local_env.rs | 30 ++++------ control_plane/src/pageserver.rs | 22 +++---- control_plane/src/postgresql_conf.rs | 5 +- control_plane/src/safekeeper.rs | 9 +-- control_plane/src/storage_controller.rs | 58 +++++++++--------- control_plane/storcon_cli/src/main.rs | 51 ++++++++-------- libs/consumption_metrics/Cargo.toml | 2 +- libs/desim/src/chan.rs | 3 +- libs/desim/src/executor.rs | 12 ++-- libs/desim/src/network.rs | 31 ++++------ libs/desim/src/node_os.rs | 9 +-- libs/desim/src/options.rs | 3 +- libs/desim/src/proto.rs | 3 +- libs/desim/src/time.rs | 14 ++--- libs/desim/src/world.rs | 27 ++++----- libs/desim/tests/reliable_copy_test.rs | 7 ++- libs/http-utils/src/endpoint.rs | 60 ++++++++++--------- libs/http-utils/src/error.rs | 6 +- libs/http-utils/src/failpoints.rs | 7 +-- libs/http-utils/src/json.rs | 2 +- libs/http-utils/src/lib.rs | 2 +- libs/http-utils/src/pprof.rs | 14 ++--- libs/http-utils/src/request.rs | 11 ++-- libs/metrics/src/hll.rs | 32 +++++----- libs/metrics/src/launch_timestamp.rs | 3 +- libs/metrics/src/lib.rs | 42 +++++-------- libs/postgres_backend/src/lib.rs | 28 +++++---- libs/postgres_backend/tests/simple_select.rs | 5 +- libs/postgres_connection/src/lib.rs | 11 ++-- libs/postgres_ffi/benches/waldecoder.rs | 2 +- libs/postgres_ffi/build.rs | 2 +- libs/postgres_ffi/src/lib.rs | 32 +++++----- libs/postgres_ffi/src/pg_constants.rs | 3 +- libs/postgres_ffi/src/walrecord.rs | 27 +++++---- .../wal_craft/src/bin/wal_craft.rs | 6 +- libs/postgres_ffi/wal_craft/src/lib.rs | 19 +++--- libs/pq_proto/src/framed.rs | 7 +-- libs/pq_proto/src/lib.rs | 13 ++-- libs/tenant_size_model/src/calculation.rs | 6 +- libs/tenant_size_model/src/svg.rs | 5 +- libs/tracing-utils/src/http.rs | 4 +- libs/tracing-utils/src/lib.rs | 4 +- libs/utils/benches/benchmarks.rs | 2 +- libs/utils/src/auth.rs | 24 +++++--- libs/utils/src/backoff.rs | 4 +- libs/utils/src/bin_ser.rs | 12 ++-- libs/utils/src/circuit_breaker.rs | 6 +- libs/utils/src/completion.rs | 3 +- libs/utils/src/crashsafe.rs | 8 +-- libs/utils/src/env.rs | 3 +- libs/utils/src/failpoint_support.rs | 8 ++- libs/utils/src/fs_ext.rs | 3 +- libs/utils/src/fs_ext/rename_noreplace.rs | 3 +- libs/utils/src/generation.rs | 4 +- libs/utils/src/guard_arc_swap.rs | 3 +- libs/utils/src/id.rs | 8 +-- libs/utils/src/leaky_bucket.rs | 13 ++-- libs/utils/src/linux_socket_ioctl.rs | 28 +++++---- libs/utils/src/lock_file.rs | 13 ++-- libs/utils/src/logging.rs | 7 ++- libs/utils/src/lsn.rs | 9 +-- libs/utils/src/measured_stream.rs | 3 +- libs/utils/src/pageserver_feedback.rs | 2 +- libs/utils/src/postgres_client.rs | 2 +- libs/utils/src/rate_limit.rs | 3 +- libs/utils/src/sentry_init.rs | 2 +- libs/utils/src/seqwait.rs | 10 ++-- libs/utils/src/serde_percent.rs | 6 +- libs/utils/src/shard.rs | 9 +-- libs/utils/src/signals.rs | 4 +- libs/utils/src/simple_rcu.rs | 6 +- libs/utils/src/sync/gate.rs | 10 +--- libs/utils/src/sync/heavier_once_cell.rs | 16 +++-- libs/utils/src/sync/spsc_fold.rs | 3 +- libs/utils/src/tcp_listener.rs | 9 ++- libs/utils/src/tracing_span_assert.rs | 10 ++-- libs/utils/src/try_rcu.rs | 6 +- libs/utils/src/vec_map.rs | 7 ++- libs/utils/src/zstd.rs | 17 ++---- libs/utils/tests/bin_ser_test.rs | 3 +- .../benches/bench_interpret_wal.rs | 28 +++++---- libs/wal_decoder/src/decoder.rs | 5 +- libs/wal_decoder/src/serialized_batch.rs | 17 +++--- libs/wal_decoder/src/wire_format.rs | 7 +-- libs/walproposer/build.rs | 6 +- libs/walproposer/src/api_bindings.rs | 27 +++------ libs/walproposer/src/walproposer.rs | 35 +++++------ pageserver/client/src/mgmt_api.rs | 16 +++-- pageserver/client/src/page_service.rs | 21 +++---- .../src/bin/compaction-simulator.rs | 11 ++-- pageserver/compaction/src/compact_tiered.rs | 22 +++---- pageserver/compaction/src/helpers.rs | 16 ++--- pageserver/compaction/src/identify_levels.rs | 15 +++-- pageserver/compaction/src/interface.rs | 7 ++- pageserver/compaction/src/simulator.rs | 15 ++--- pageserver/compaction/src/simulator/draw.rs | 16 ++--- pageserver/ctl/src/draw_timeline_dir.rs | 16 ++--- pageserver/ctl/src/key.rs | 26 +++++--- pageserver/ctl/src/layer_map_analyzer.rs | 22 +++---- pageserver/ctl/src/layers.rs | 5 +- pageserver/ctl/src/main.rs | 36 ++++++----- pageserver/pagebench/src/cmd/aux_files.rs | 8 +-- pageserver/pagebench/src/cmd/basebackup.rs | 24 ++++---- .../pagebench/src/cmd/getpage_latest_lsn.rs | 28 ++++----- .../src/cmd/ondemand_download_churn.rs | 22 +++---- .../cmd/trigger_initial_size_calculation.rs | 3 +- safekeeper/client/src/mgmt_api.rs | 9 ++- storage_controller/client/src/control_api.rs | 3 +- storage_controller/src/service.rs | 2 +- 115 files changed, 723 insertions(+), 769 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d11f2b7fc..293ed465ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -984,9 +984,9 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.70.1" +version = "0.71.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" +checksum = "5f58bf3d7db68cfbac37cfc485a8d711e87e064c3d0fe0435b92f7a407f9d6b3" dependencies = [ "bitflags 2.8.0", "cexpr", @@ -997,7 +997,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "rustc-hash", + "rustc-hash 2.1.1", "shlex", "syn 2.0.90", ] @@ -3537,7 +3537,7 @@ dependencies = [ "measured-derive", "memchr", "parking_lot 0.12.1", - "rustc-hash", + "rustc-hash 1.1.0", "ryu", ] @@ -5012,7 +5012,7 @@ dependencies = [ "reqwest-tracing", "rsa", "rstest", - "rustc-hash", + "rustc-hash 1.1.0", "rustls 0.23.18", "rustls-native-certs 0.8.0", "rustls-pemfile 2.1.1", @@ -5630,6 +5630,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +[[package]] +name = "rustc-hash" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" + [[package]] name = "rustc_version" version = "0.4.0" diff --git a/Cargo.toml b/Cargo.toml index 223ff4249e..ff45d46a47 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,7 +43,7 @@ members = [ ] [workspace.package] -edition = "2021" +edition = "2024" license = "Apache-2.0" ## All dependency versions, used in the project @@ -70,7 +70,7 @@ aws-types = "1.3" axum = { version = "0.8.1", features = ["ws"] } base64 = "0.13.0" bincode = "1.3" -bindgen = "0.70" +bindgen = "0.71" bit_field = "0.10.2" bstr = "1.0" byteorder = "1.4" diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index c668e68402..1eac4f7ff0 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -25,7 +25,7 @@ use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use nix::errno::Errno; use nix::fcntl::{FcntlArg, FdFlag}; -use nix::sys::signal::{kill, Signal}; +use nix::sys::signal::{Signal, kill}; use nix::unistd::Pid; use utils::pid_file::{self, PidFileRead}; diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 7d908ccae9..f258025428 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -5,7 +5,16 @@ //! easier to work with locally. The python tests in `test_runner` //! rely on `neon_local` to set up the environment for each test. //! -use anyhow::{anyhow, bail, Context, Result}; +use std::borrow::Cow; +use std::collections::{BTreeSet, HashMap}; +use std::fs::File; +use std::os::fd::AsRawFd; +use std::path::PathBuf; +use std::process::exit; +use std::str::FromStr; +use std::time::Duration; + +use anyhow::{Context, Result, anyhow, bail}; use clap::Parser; use compute_api::spec::ComputeMode; use control_plane::endpoint::ComputeControlPlane; @@ -19,7 +28,7 @@ use control_plane::storage_controller::{ NeonStorageControllerStartArgs, NeonStorageControllerStopArgs, StorageController, }; use control_plane::{broker, local_env}; -use nix::fcntl::{flock, FlockArg}; +use nix::fcntl::{FlockArg, flock}; use pageserver_api::config::{ DEFAULT_HTTP_LISTEN_PORT as DEFAULT_PAGESERVER_HTTP_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_PAGESERVER_PG_PORT, @@ -35,23 +44,13 @@ use safekeeper_api::{ DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, }; -use std::borrow::Cow; -use std::collections::{BTreeSet, HashMap}; -use std::fs::File; -use std::os::fd::AsRawFd; -use std::path::PathBuf; -use std::process::exit; -use std::str::FromStr; -use std::time::Duration; use storage_broker::DEFAULT_LISTEN_ADDR as DEFAULT_BROKER_ADDR; use tokio::task::JoinSet; use url::Host; -use utils::{ - auth::{Claims, Scope}, - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, - lsn::Lsn, - project_git_version, -}; +use utils::auth::{Claims, Scope}; +use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId}; +use utils::lsn::Lsn; +use utils::project_git_version; // Default id of a safekeeper node, if not specified on the command line. const DEFAULT_SAFEKEEPER_ID: NodeId = NodeId(1); @@ -921,7 +920,9 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result { let init_conf: NeonLocalInitConf = if let Some(config_path) = &args.config { // User (likely the Python test suite) provided a description of the environment. if args.num_pageservers.is_some() { - bail!("Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead"); + bail!( + "Cannot specify both --num-pageservers and --config, use key `pageservers` in the --config file instead" + ); } // load and parse the file let contents = std::fs::read_to_string(config_path).with_context(|| { @@ -1315,10 +1316,14 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res match (mode, args.hot_standby) { (ComputeMode::Static(_), true) => { - bail!("Cannot start a node in hot standby mode when it is already configured as a static replica") + bail!( + "Cannot start a node in hot standby mode when it is already configured as a static replica" + ) } (ComputeMode::Primary, true) => { - bail!("Cannot start a node as a hot standby replica, it is already configured as primary node") + bail!( + "Cannot start a node as a hot standby replica, it is already configured as primary node" + ) } _ => {} } diff --git a/control_plane/src/broker.rs b/control_plane/src/broker.rs index c8ac5d8981..1b507bb384 100644 --- a/control_plane/src/broker.rs +++ b/control_plane/src/broker.rs @@ -8,7 +8,6 @@ use std::time::Duration; use anyhow::Context; - use camino::Utf8PathBuf; use crate::{background_process, local_env}; diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 407578abb8..50ccca36fe 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -37,27 +37,20 @@ //! ``` //! use std::collections::BTreeMap; -use std::net::IpAddr; -use std::net::Ipv4Addr; -use std::net::SocketAddr; -use std::net::TcpStream; +use std::net::{IpAddr, Ipv4Addr, SocketAddr, TcpStream}; use std::path::PathBuf; use std::process::Command; use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; -use std::time::SystemTime; -use std::time::UNIX_EPOCH; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{Context, Result, anyhow, bail}; use compute_api::requests::ConfigurationRequest; -use compute_api::responses::ComputeCtlConfig; -use compute_api::spec::Database; -use compute_api::spec::PgIdent; -use compute_api::spec::RemoteExtSpec; -use compute_api::spec::Role; -use nix::sys::signal::kill; -use nix::sys::signal::Signal; +use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse}; +use compute_api::spec::{ + Cluster, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, RemoteExtSpec, Role, +}; +use nix::sys::signal::{Signal, kill}; use pageserver_api::shard::ShardStripeSize; use reqwest::header::CONTENT_TYPE; use serde::{Deserialize, Serialize}; @@ -69,9 +62,6 @@ use crate::local_env::LocalEnv; use crate::postgresql_conf::PostgresConf; use crate::storage_controller::StorageController; -use compute_api::responses::{ComputeStatus, ComputeStatusResponse}; -use compute_api::spec::{Cluster, ComputeFeature, ComputeMode, ComputeSpec}; - // contents of a endpoint.json file #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] pub struct EndpointConf { @@ -237,7 +227,9 @@ impl ComputeControlPlane { }); if let Some((key, _)) = duplicates.next() { - bail!("attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported."); + bail!( + "attempting to create a duplicate primary endpoint on tenant {tenant_id}, timeline {timeline_id}: endpoint {key:?} exists already. please don't do this, it is not supported." + ); } } Ok(()) diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 2fe4cd5202..f4026efbbf 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -3,28 +3,22 @@ //! Now it also provides init method which acts like a stub for proper installation //! script which will use local paths. -use anyhow::{bail, Context}; +use std::collections::HashMap; +use std::net::{IpAddr, Ipv4Addr, SocketAddr}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; +use std::time::Duration; +use std::{env, fs}; +use anyhow::{Context, bail}; use clap::ValueEnum; use postgres_backend::AuthType; use reqwest::Url; use serde::{Deserialize, Serialize}; -use std::collections::HashMap; -use std::env; -use std::fs; -use std::net::IpAddr; -use std::net::Ipv4Addr; -use std::net::SocketAddr; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -use std::time::Duration; -use utils::{ - auth::{encode_from_key_file, Claims}, - id::{NodeId, TenantId, TenantTimelineId, TimelineId}, -}; +use utils::auth::{Claims, encode_from_key_file}; +use utils::id::{NodeId, TenantId, TenantTimelineId, TimelineId}; -use crate::pageserver::PageServerNode; -use crate::pageserver::PAGESERVER_REMOTE_STORAGE_DIR; +use crate::pageserver::{PAGESERVER_REMOTE_STORAGE_DIR, PageServerNode}; use crate::safekeeper::SafekeeperNode; pub const DEFAULT_PG_VERSION: u32 = 16; @@ -465,7 +459,9 @@ impl LocalEnv { if old_timeline_id == &timeline_id { Ok(()) } else { - bail!("branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}"); + bail!( + "branch '{branch_name}' is already mapped to timeline {old_timeline_id}, cannot map to another timeline {timeline_id}" + ); } } else { existing_values.push((tenant_id, timeline_id)); diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 2bf89b7bfa..39656bdbbe 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -7,7 +7,6 @@ //! ``` //! use std::collections::HashMap; - use std::io; use std::io::Write; use std::num::NonZeroU64; @@ -15,22 +14,19 @@ use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; -use anyhow::{bail, Context}; +use anyhow::{Context, bail}; use camino::Utf8PathBuf; use pageserver_api::models::{self, TenantInfo, TimelineInfo}; use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; use postgres_backend::AuthType; -use postgres_connection::{parse_host_port, PgConnectionConfig}; +use postgres_connection::{PgConnectionConfig, parse_host_port}; use utils::auth::{Claims, Scope}; -use utils::id::NodeId; -use utils::{ - id::{TenantId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; -use crate::local_env::{NeonLocalInitPageserverConf, PageServerConf}; -use crate::{background_process, local_env::LocalEnv}; +use crate::background_process; +use crate::local_env::{LocalEnv, NeonLocalInitPageserverConf, PageServerConf}; /// Directory within .neon which will be used by default for LocalFs remote storage. pub const PAGESERVER_REMOTE_STORAGE_DIR: &str = "local_fs_remote_storage/pageserver"; @@ -81,7 +77,11 @@ impl PageServerNode { &self, conf: NeonLocalInitPageserverConf, ) -> anyhow::Result { - assert_eq!(&PageServerConf::from(&conf), &self.conf, "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully"); + assert_eq!( + &PageServerConf::from(&conf), + &self.conf, + "during neon_local init, we derive the runtime state of ps conf (self.conf) from the --config flag fully" + ); // TODO(christian): instead of what we do here, create a pageserver_api::config::ConfigToml (PR #7656) diff --git a/control_plane/src/postgresql_conf.rs b/control_plane/src/postgresql_conf.rs index 5aee12dc97..a824af9490 100644 --- a/control_plane/src/postgresql_conf.rs +++ b/control_plane/src/postgresql_conf.rs @@ -1,3 +1,6 @@ +use std::collections::HashMap; +use std::fmt; + /// /// Module for parsing postgresql.conf file. /// @@ -6,8 +9,6 @@ /// funny stuff like include-directives or funny escaping. use once_cell::sync::Lazy; use regex::Regex; -use std::collections::HashMap; -use std::fmt; /// In-memory representation of a postgresql.conf file #[derive(Default, Debug)] diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs index ce7751fb14..70915d5aaf 100644 --- a/control_plane/src/safekeeper.rs +++ b/control_plane/src/safekeeper.rs @@ -14,18 +14,15 @@ use std::{io, result}; use anyhow::Context; use camino::Utf8PathBuf; +use http_utils::error::HttpErrorBody; use postgres_connection::PgConnectionConfig; use reqwest::{IntoUrl, Method}; use thiserror::Error; - -use http_utils::error::HttpErrorBody; use utils::auth::{Claims, Scope}; use utils::id::NodeId; -use crate::{ - background_process, - local_env::{LocalEnv, SafekeeperConf}, -}; +use crate::background_process; +use crate::local_env::{LocalEnv, SafekeeperConf}; #[derive(Error, Debug)] pub enum SafekeeperHttpError { diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 0fadb9c5fe..16e12f4e02 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -1,44 +1,39 @@ -use crate::{ - background_process, - local_env::{LocalEnv, NeonStorageControllerConf}, -}; +use std::ffi::OsStr; +use std::fs; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::process::ExitStatus; +use std::str::FromStr; +use std::sync::OnceLock; +use std::time::{Duration, Instant}; + use camino::{Utf8Path, Utf8PathBuf}; use hyper0::Uri; use nix::unistd::Pid; -use pageserver_api::{ - controller_api::{ - NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest, - TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest, - TenantShardMigrateResponse, - }, - models::{ - TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo, - }, - shard::{ShardStripeSize, TenantShardId}, +use pageserver_api::controller_api::{ + NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest, + TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest, + TenantShardMigrateResponse, }; +use pageserver_api::models::{ + TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo, +}; +use pageserver_api::shard::{ShardStripeSize, TenantShardId}; use pageserver_client::mgmt_api::ResponseErrorMessageExt; use postgres_backend::AuthType; use reqwest::Method; -use serde::{de::DeserializeOwned, Deserialize, Serialize}; -use std::{ - ffi::OsStr, - fs, - net::SocketAddr, - path::PathBuf, - process::ExitStatus, - str::FromStr, - sync::OnceLock, - time::{Duration, Instant}, -}; +use serde::de::DeserializeOwned; +use serde::{Deserialize, Serialize}; use tokio::process::Command; use tracing::instrument; use url::Url; -use utils::{ - auth::{encode_from_key_file, Claims, Scope}, - id::{NodeId, TenantId}, -}; +use utils::auth::{Claims, Scope, encode_from_key_file}; +use utils::id::{NodeId, TenantId}; use whoami::username; +use crate::background_process; +use crate::local_env::{LocalEnv, NeonStorageControllerConf}; + pub struct StorageController { env: LocalEnv, private_key: Option>, @@ -96,7 +91,8 @@ pub struct AttachHookRequest { #[derive(Serialize, Deserialize)] pub struct AttachHookResponse { - pub gen: Option, + #[serde(rename = "gen")] + pub generation: Option, } #[derive(Serialize, Deserialize)] @@ -779,7 +775,7 @@ impl StorageController { ) .await?; - Ok(response.gen) + Ok(response.generation) } #[instrument(skip(self))] diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index 40b86e4110..2e2c22c791 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -1,34 +1,27 @@ -use futures::StreamExt; -use std::{ - collections::{HashMap, HashSet}, - str::FromStr, - time::Duration, -}; +use std::collections::{HashMap, HashSet}; +use std::str::FromStr; +use std::time::Duration; use clap::{Parser, Subcommand}; -use pageserver_api::{ - controller_api::{ - AvailabilityZone, NodeAvailabilityWrapper, NodeDescribeResponse, NodeShardResponse, - SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy, - ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, - TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, - }, - models::{ - EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, - ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, - TenantShardSplitRequest, TenantShardSplitResponse, - }, - shard::{ShardStripeSize, TenantShardId}, +use futures::StreamExt; +use pageserver_api::controller_api::{ + AvailabilityZone, NodeAvailabilityWrapper, NodeConfigureRequest, NodeDescribeResponse, + NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, PlacementPolicy, + SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy, + ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, + TenantDescribeResponse, TenantPolicyRequest, TenantShardMigrateRequest, + TenantShardMigrateResponse, }; +use pageserver_api::models::{ + EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters, + TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest, + TenantShardSplitResponse, +}; +use pageserver_api::shard::{ShardStripeSize, TenantShardId}; use pageserver_client::mgmt_api::{self}; use reqwest::{Method, StatusCode, Url}; -use utils::id::{NodeId, TenantId, TimelineId}; - -use pageserver_api::controller_api::{ - NodeConfigureRequest, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy, - TenantShardMigrateRequest, TenantShardMigrateResponse, -}; use storage_controller_client::control_api::Client; +use utils::id::{NodeId, TenantId, TimelineId}; #[derive(Subcommand, Debug)] enum Command { @@ -921,7 +914,9 @@ async fn main() -> anyhow::Result<()> { } Command::TenantDrop { tenant_id, unclean } => { if !unclean { - anyhow::bail!("This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed.") + anyhow::bail!( + "This command is not a tenant deletion, and uncleanly drops all controller state for the tenant. If you know what you're doing, add `--unclean` to proceed." + ) } storcon_client .dispatch::<(), ()>( @@ -933,7 +928,9 @@ async fn main() -> anyhow::Result<()> { } Command::NodeDrop { node_id, unclean } => { if !unclean { - anyhow::bail!("This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed.") + anyhow::bail!( + "This command is not a clean node decommission, and uncleanly drops all controller state for the node, without checking if any tenants still refer to it. If you know what you're doing, add `--unclean` to proceed." + ) } storcon_client .dispatch::<(), ()>(Method::POST, format!("debug/v1/node/{node_id}/drop"), None) diff --git a/libs/consumption_metrics/Cargo.toml b/libs/consumption_metrics/Cargo.toml index 0e517e3856..77f130950e 100644 --- a/libs/consumption_metrics/Cargo.toml +++ b/libs/consumption_metrics/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "consumption_metrics" version = "0.1.0" -edition = "2021" +edition = "2024" license = "Apache-2.0" [dependencies] diff --git a/libs/desim/src/chan.rs b/libs/desim/src/chan.rs index 6661d59871..8882cd3b56 100644 --- a/libs/desim/src/chan.rs +++ b/libs/desim/src/chan.rs @@ -1,4 +1,5 @@ -use std::{collections::VecDeque, sync::Arc}; +use std::collections::VecDeque; +use std::sync::Arc; use parking_lot::{Mutex, MutexGuard}; diff --git a/libs/desim/src/executor.rs b/libs/desim/src/executor.rs index 9d44bd7741..df8b071c06 100644 --- a/libs/desim/src/executor.rs +++ b/libs/desim/src/executor.rs @@ -1,11 +1,7 @@ -use std::{ - panic::AssertUnwindSafe, - sync::{ - atomic::{AtomicBool, AtomicU32, AtomicU8, Ordering}, - mpsc, Arc, OnceLock, - }, - thread::JoinHandle, -}; +use std::panic::AssertUnwindSafe; +use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU32, Ordering}; +use std::sync::{Arc, OnceLock, mpsc}; +use std::thread::JoinHandle; use tracing::{debug, error, trace}; diff --git a/libs/desim/src/network.rs b/libs/desim/src/network.rs index e15a714daa..cf096dba80 100644 --- a/libs/desim/src/network.rs +++ b/libs/desim/src/network.rs @@ -1,26 +1,19 @@ -use std::{ - cmp::Ordering, - collections::{BinaryHeap, VecDeque}, - fmt::{self, Debug}, - ops::DerefMut, - sync::{mpsc, Arc}, -}; +use std::cmp::Ordering; +use std::collections::{BinaryHeap, VecDeque}; +use std::fmt::{self, Debug}; +use std::ops::DerefMut; +use std::sync::{Arc, mpsc}; -use parking_lot::{ - lock_api::{MappedMutexGuard, MutexGuard}, - Mutex, RawMutex, -}; +use parking_lot::lock_api::{MappedMutexGuard, MutexGuard}; +use parking_lot::{Mutex, RawMutex}; use rand::rngs::StdRng; use tracing::debug; -use crate::{ - executor::{self, ThreadContext}, - options::NetworkOptions, - proto::NetEvent, - proto::NodeEvent, -}; - -use super::{chan::Chan, proto::AnyMessage}; +use super::chan::Chan; +use super::proto::AnyMessage; +use crate::executor::{self, ThreadContext}; +use crate::options::NetworkOptions; +use crate::proto::{NetEvent, NodeEvent}; pub struct NetworkTask { options: Arc, diff --git a/libs/desim/src/node_os.rs b/libs/desim/src/node_os.rs index 7744a9f5e1..e0cde7b284 100644 --- a/libs/desim/src/node_os.rs +++ b/libs/desim/src/node_os.rs @@ -2,14 +2,11 @@ use std::sync::Arc; use rand::Rng; +use super::chan::Chan; +use super::network::TCP; +use super::world::{Node, NodeId, World}; use crate::proto::NodeEvent; -use super::{ - chan::Chan, - network::TCP, - world::{Node, NodeId, World}, -}; - /// Abstraction with all functions (aka syscalls) available to the node. #[derive(Clone)] pub struct NodeOs { diff --git a/libs/desim/src/options.rs b/libs/desim/src/options.rs index 5da7c2c482..9b1a42fd28 100644 --- a/libs/desim/src/options.rs +++ b/libs/desim/src/options.rs @@ -1,4 +1,5 @@ -use rand::{rngs::StdRng, Rng}; +use rand::Rng; +use rand::rngs::StdRng; /// Describes random delays and failures. Delay will be uniformly distributed in [min, max]. /// Connection failure will occur with the probablity fail_prob. diff --git a/libs/desim/src/proto.rs b/libs/desim/src/proto.rs index 92a7e8a27d..31bc29e6a6 100644 --- a/libs/desim/src/proto.rs +++ b/libs/desim/src/proto.rs @@ -3,7 +3,8 @@ use std::fmt::Debug; use bytes::Bytes; use utils::lsn::Lsn; -use crate::{network::TCP, world::NodeId}; +use crate::network::TCP; +use crate::world::NodeId; /// Internal node events. #[derive(Debug)] diff --git a/libs/desim/src/time.rs b/libs/desim/src/time.rs index 7ce605bda8..350d182cc3 100644 --- a/libs/desim/src/time.rs +++ b/libs/desim/src/time.rs @@ -1,12 +1,8 @@ -use std::{ - cmp::Ordering, - collections::BinaryHeap, - ops::DerefMut, - sync::{ - atomic::{AtomicU32, AtomicU64}, - Arc, - }, -}; +use std::cmp::Ordering; +use std::collections::BinaryHeap; +use std::ops::DerefMut; +use std::sync::Arc; +use std::sync::atomic::{AtomicU32, AtomicU64}; use parking_lot::Mutex; use tracing::trace; diff --git a/libs/desim/src/world.rs b/libs/desim/src/world.rs index 7d60be04b5..576ba89cd7 100644 --- a/libs/desim/src/world.rs +++ b/libs/desim/src/world.rs @@ -1,19 +1,18 @@ +use std::ops::DerefMut; +use std::sync::{Arc, mpsc}; + use parking_lot::Mutex; -use rand::{rngs::StdRng, SeedableRng}; -use std::{ - ops::DerefMut, - sync::{mpsc, Arc}, -}; +use rand::SeedableRng; +use rand::rngs::StdRng; -use crate::{ - executor::{ExternalHandle, Runtime}, - network::NetworkTask, - options::NetworkOptions, - proto::{NodeEvent, SimEvent}, - time::Timing, -}; - -use super::{chan::Chan, network::TCP, node_os::NodeOs}; +use super::chan::Chan; +use super::network::TCP; +use super::node_os::NodeOs; +use crate::executor::{ExternalHandle, Runtime}; +use crate::network::NetworkTask; +use crate::options::NetworkOptions; +use crate::proto::{NodeEvent, SimEvent}; +use crate::time::Timing; pub type NodeId = u32; diff --git a/libs/desim/tests/reliable_copy_test.rs b/libs/desim/tests/reliable_copy_test.rs index cf7bff8f5a..1ddf9844de 100644 --- a/libs/desim/tests/reliable_copy_test.rs +++ b/libs/desim/tests/reliable_copy_test.rs @@ -1,14 +1,15 @@ //! Simple test to verify that simulator is working. #[cfg(test)] mod reliable_copy_test { + use std::sync::Arc; + use anyhow::Result; use desim::executor::{self, PollSome}; + use desim::node_os::NodeOs; use desim::options::{Delay, NetworkOptions}; - use desim::proto::{NetEvent, NodeEvent, ReplCell}; + use desim::proto::{AnyMessage, NetEvent, NodeEvent, ReplCell}; use desim::world::{NodeId, World}; - use desim::{node_os::NodeOs, proto::AnyMessage}; use parking_lot::Mutex; - use std::sync::Arc; use tracing::info; /// Disk storage trait and implementation. diff --git a/libs/http-utils/src/endpoint.rs b/libs/http-utils/src/endpoint.rs index be97b341d1..6128113580 100644 --- a/libs/http-utils/src/endpoint.rs +++ b/libs/http-utils/src/endpoint.rs @@ -1,30 +1,30 @@ -use crate::error::{api_error_handler, route_error_handler, ApiError}; -use crate::pprof; -use crate::request::{get_query_param, parse_query_param}; -use ::pprof::protos::Message as _; -use ::pprof::ProfilerGuardBuilder; -use anyhow::{anyhow, Context}; -use bytes::{Bytes, BytesMut}; -use hyper::header::{HeaderName, AUTHORIZATION, CONTENT_DISPOSITION}; -use hyper::http::HeaderValue; -use hyper::Method; -use hyper::{header::CONTENT_TYPE, Body, Request, Response}; -use metrics::{register_int_counter, Encoder, IntCounter, TextEncoder}; -use once_cell::sync::Lazy; -use regex::Regex; -use routerify::ext::RequestExt; -use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; -use tokio::sync::{mpsc, Mutex, Notify}; -use tokio_stream::wrappers::ReceiverStream; -use tokio_util::io::ReaderStream; -use tracing::{debug, info, info_span, warn, Instrument}; -use utils::auth::{AuthError, Claims, SwappableJwtAuth}; - use std::future::Future; use std::io::Write as _; use std::str::FromStr; use std::time::Duration; +use ::pprof::ProfilerGuardBuilder; +use ::pprof::protos::Message as _; +use anyhow::{Context, anyhow}; +use bytes::{Bytes, BytesMut}; +use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName}; +use hyper::http::HeaderValue; +use hyper::{Body, Method, Request, Response}; +use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter}; +use once_cell::sync::Lazy; +use regex::Regex; +use routerify::ext::RequestExt; +use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; +use tokio::sync::{Mutex, Notify, mpsc}; +use tokio_stream::wrappers::ReceiverStream; +use tokio_util::io::ReaderStream; +use tracing::{Instrument, debug, info, info_span, warn}; +use utils::auth::{AuthError, Claims, SwappableJwtAuth}; + +use crate::error::{ApiError, api_error_handler, route_error_handler}; +use crate::pprof; +use crate::request::{get_query_param, parse_query_param}; + static SERVE_METRICS_COUNT: Lazy = Lazy::new(|| { register_int_counter!( "libmetrics_metric_handler_requests_total", @@ -375,7 +375,7 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A Err(_) => { return Err(ApiError::Conflict( "profiler already running (use ?force=true to cancel it)".into(), - )) + )); } } tokio::time::sleep(Duration::from_millis(1)).await; // don't busy-wait @@ -539,8 +539,8 @@ pub async fn profile_heap_handler(req: Request) -> Result, } } -pub fn add_request_id_middleware( -) -> Middleware { +pub fn add_request_id_middleware() +-> Middleware { Middleware::pre(move |req| async move { let request_id = match req.headers().get(&X_REQUEST_ID_HEADER) { Some(request_id) => request_id @@ -664,7 +664,7 @@ pub fn auth_middleware( None => { return Err(ApiError::Unauthorized( "missing authorization header".to_string(), - )) + )); } } } @@ -717,12 +717,14 @@ pub fn check_permission_with( #[cfg(test)] mod tests { - use super::*; - use hyper::service::Service; - use routerify::RequestServiceBuilder; use std::future::poll_fn; use std::net::{IpAddr, SocketAddr}; + use hyper::service::Service; + use routerify::RequestServiceBuilder; + + use super::*; + #[tokio::test] async fn test_request_id_returned() { let builder = RequestServiceBuilder::new(make_router().build().unwrap()).unwrap(); diff --git a/libs/http-utils/src/error.rs b/libs/http-utils/src/error.rs index 746305caec..f790dc26ca 100644 --- a/libs/http-utils/src/error.rs +++ b/libs/http-utils/src/error.rs @@ -1,10 +1,10 @@ -use hyper::{header, Body, Response, StatusCode}; -use serde::{Deserialize, Serialize}; use std::borrow::Cow; use std::error::Error as StdError; + +use hyper::{Body, Response, StatusCode, header}; +use serde::{Deserialize, Serialize}; use thiserror::Error; use tracing::{error, info, warn}; - use utils::auth::AuthError; #[derive(Debug, Error)] diff --git a/libs/http-utils/src/failpoints.rs b/libs/http-utils/src/failpoints.rs index 8a1e0c8cf0..984823f4a9 100644 --- a/libs/http-utils/src/failpoints.rs +++ b/libs/http-utils/src/failpoints.rs @@ -1,12 +1,11 @@ -use crate::error::ApiError; -use crate::json::{json_request, json_response}; - use hyper::{Body, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; use tokio_util::sync::CancellationToken; - use utils::failpoint_support::apply_failpoint; +use crate::error::ApiError; +use crate::json::{json_request, json_response}; + pub type ConfigureFailpointsRequest = Vec; /// Information for configuring a single fail point diff --git a/libs/http-utils/src/json.rs b/libs/http-utils/src/json.rs index e53231f313..14ebac91e6 100644 --- a/libs/http-utils/src/json.rs +++ b/libs/http-utils/src/json.rs @@ -1,6 +1,6 @@ use anyhow::Context; use bytes::Buf; -use hyper::{header, Body, Request, Response, StatusCode}; +use hyper::{Body, Request, Response, StatusCode, header}; use serde::{Deserialize, Serialize}; use super::error::ApiError; diff --git a/libs/http-utils/src/lib.rs b/libs/http-utils/src/lib.rs index ae6a27aaa8..c692a54257 100644 --- a/libs/http-utils/src/lib.rs +++ b/libs/http-utils/src/lib.rs @@ -9,4 +9,4 @@ extern crate hyper0 as hyper; /// Current fast way to apply simple http routing in various Neon binaries. /// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed. -pub use routerify::{ext::RequestExt, RouterBuilder, RouterService}; +pub use routerify::{RouterBuilder, RouterService, ext::RequestExt}; diff --git a/libs/http-utils/src/pprof.rs b/libs/http-utils/src/pprof.rs index fe1cc10838..529017f350 100644 --- a/libs/http-utils/src/pprof.rs +++ b/libs/http-utils/src/pprof.rs @@ -1,15 +1,15 @@ -use anyhow::bail; -use flate2::write::{GzDecoder, GzEncoder}; -use flate2::Compression; -use itertools::Itertools as _; -use pprof::protos::{Function, Line, Location, Message as _, Profile}; -use regex::Regex; - use std::borrow::Cow; use std::collections::{HashMap, HashSet}; use std::ffi::c_void; use std::io::Write as _; +use anyhow::bail; +use flate2::Compression; +use flate2::write::{GzDecoder, GzEncoder}; +use itertools::Itertools as _; +use pprof::protos::{Function, Line, Location, Message as _, Profile}; +use regex::Regex; + /// Decodes a gzip-compressed Protobuf-encoded pprof profile. pub fn decode(bytes: &[u8]) -> anyhow::Result { let mut gz = GzDecoder::new(Vec::new()); diff --git a/libs/http-utils/src/request.rs b/libs/http-utils/src/request.rs index 7ea71685ec..9024a90a82 100644 --- a/libs/http-utils/src/request.rs +++ b/libs/http-utils/src/request.rs @@ -1,10 +1,13 @@ use core::fmt; -use std::{borrow::Cow, str::FromStr}; +use std::borrow::Cow; +use std::str::FromStr; + +use anyhow::anyhow; +use hyper::body::HttpBody; +use hyper::{Body, Request}; +use routerify::ext::RequestExt; use super::error::ApiError; -use anyhow::anyhow; -use hyper::{body::HttpBody, Body, Request}; -use routerify::ext::RequestExt; pub fn get_request_param<'a>( request: &'a Request, diff --git a/libs/metrics/src/hll.rs b/libs/metrics/src/hll.rs index 723916a742..93f6a2b7cc 100644 --- a/libs/metrics/src/hll.rs +++ b/libs/metrics/src/hll.rs @@ -6,17 +6,15 @@ //! Probabilistic cardinality estimators, such as the HyperLogLog algorithm, //! use significantly less memory than this, but can only approximate the cardinality. -use std::{ - hash::{BuildHasher, BuildHasherDefault, Hash}, - sync::atomic::AtomicU8, -}; +use std::hash::{BuildHasher, BuildHasherDefault, Hash}; +use std::sync::atomic::AtomicU8; -use measured::{ - label::{LabelGroupVisitor, LabelName, LabelValue, LabelVisitor}, - metric::{counter::CounterState, name::MetricNameEncoder, Metric, MetricType, MetricVec}, - text::TextEncoder, - LabelGroup, -}; +use measured::LabelGroup; +use measured::label::{LabelGroupVisitor, LabelName, LabelValue, LabelVisitor}; +use measured::metric::counter::CounterState; +use measured::metric::name::MetricNameEncoder; +use measured::metric::{Metric, MetricType, MetricVec}; +use measured::text::TextEncoder; use twox_hash::xxh3; /// Create an [`HyperLogLogVec`] and registers to default registry. @@ -27,9 +25,7 @@ macro_rules! register_hll_vec { $crate::register(Box::new(hll_vec.clone())).map(|_| hll_vec) }}; - ($N:literal, $NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{ - $crate::register_hll_vec!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES) - }}; + ($N:literal, $NAME:expr, $HELP:expr, $LABELS_NAMES:expr $(,)?) => {{ $crate::register_hll_vec!($N, $crate::opts!($NAME, $HELP), $LABELS_NAMES) }}; } /// Create an [`HyperLogLog`] and registers to default registry. @@ -40,9 +36,7 @@ macro_rules! register_hll { $crate::register(Box::new(hll.clone())).map(|_| hll) }}; - ($N:literal, $NAME:expr, $HELP:expr $(,)?) => {{ - $crate::register_hll!($N, $crate::opts!($NAME, $HELP)) - }}; + ($N:literal, $NAME:expr, $HELP:expr $(,)?) => {{ $crate::register_hll!($N, $crate::opts!($NAME, $HELP)) }}; } /// HLL is a probabilistic cardinality measure. @@ -195,8 +189,10 @@ impl measured::metric::MetricEncoding); diff --git a/libs/metrics/src/lib.rs b/libs/metrics/src/lib.rs index 0f6c2a0937..4df8d7bc51 100644 --- a/libs/metrics/src/lib.rs +++ b/libs/metrics/src/lib.rs @@ -4,38 +4,26 @@ //! a default registry. #![deny(clippy::undocumented_unsafe_blocks)] -use measured::{ - label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels}, - metric::{ - counter::CounterState, - gauge::GaugeState, - group::Encoding, - name::{MetricName, MetricNameEncoder}, - MetricEncoding, MetricFamilyEncoding, - }, - FixedCardinalityLabel, LabelGroup, MetricGroup, -}; +use measured::label::{LabelGroupSet, LabelGroupVisitor, LabelName, NoLabels}; +use measured::metric::counter::CounterState; +use measured::metric::gauge::GaugeState; +use measured::metric::group::Encoding; +use measured::metric::name::{MetricName, MetricNameEncoder}; +use measured::metric::{MetricEncoding, MetricFamilyEncoding}; +use measured::{FixedCardinalityLabel, LabelGroup, MetricGroup}; use once_cell::sync::Lazy; +use prometheus::Registry; use prometheus::core::{ Atomic, AtomicU64, Collector, GenericCounter, GenericCounterVec, GenericGauge, GenericGaugeVec, }; pub use prometheus::local::LocalHistogram; -pub use prometheus::opts; -pub use prometheus::register; -pub use prometheus::Error; -use prometheus::Registry; -pub use prometheus::{core, default_registry, proto}; -pub use prometheus::{exponential_buckets, linear_buckets}; -pub use prometheus::{register_counter_vec, Counter, CounterVec}; -pub use prometheus::{register_gauge, Gauge}; -pub use prometheus::{register_gauge_vec, GaugeVec}; -pub use prometheus::{register_histogram, Histogram}; -pub use prometheus::{register_histogram_vec, HistogramVec}; -pub use prometheus::{register_int_counter, IntCounter}; -pub use prometheus::{register_int_counter_vec, IntCounterVec}; -pub use prometheus::{register_int_gauge, IntGauge}; -pub use prometheus::{register_int_gauge_vec, IntGaugeVec}; -pub use prometheus::{Encoder, TextEncoder}; +pub use prometheus::{ + Counter, CounterVec, Encoder, Error, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, + IntCounterVec, IntGauge, IntGaugeVec, TextEncoder, core, default_registry, exponential_buckets, + linear_buckets, opts, proto, register, register_counter_vec, register_gauge, + register_gauge_vec, register_histogram, register_histogram_vec, register_int_counter, + register_int_counter_vec, register_int_gauge, register_int_gauge_vec, +}; pub mod launch_timestamp; mod wrappers; diff --git a/libs/postgres_backend/src/lib.rs b/libs/postgres_backend/src/lib.rs index f74b229ac4..a0a891f0dc 100644 --- a/libs/postgres_backend/src/lib.rs +++ b/libs/postgres_backend/src/lib.rs @@ -4,28 +4,28 @@ //! is rather narrow, but we can extend it once required. #![deny(unsafe_code)] #![deny(clippy::undocumented_unsafe_blocks)] -use anyhow::Context; -use bytes::Bytes; -use serde::{Deserialize, Serialize}; +use std::future::Future; use std::io::ErrorKind; use std::net::SocketAddr; -use std::os::fd::AsRawFd; -use std::os::fd::RawFd; +use std::os::fd::{AsRawFd, RawFd}; use std::pin::Pin; +use std::str::FromStr; use std::sync::Arc; -use std::task::{ready, Poll}; +use std::task::{Poll, ready}; use std::{fmt, io}; -use std::{future::Future, str::FromStr}; -use tokio::io::{AsyncRead, AsyncWrite}; -use tokio_rustls::TlsAcceptor; -use tokio_util::sync::CancellationToken; -use tracing::{debug, error, info, trace, warn}; +use anyhow::Context; +use bytes::Bytes; use pq_proto::framed::{ConnectionError, Framed, FramedReader, FramedWriter}; use pq_proto::{ BeMessage, FeMessage, FeStartupPacket, ProtocolError, SQLSTATE_ADMIN_SHUTDOWN, SQLSTATE_INTERNAL_ERROR, SQLSTATE_SUCCESSFUL_COMPLETION, }; +use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tokio_rustls::TlsAcceptor; +use tokio_util::sync::CancellationToken; +use tracing::{debug, error, info, trace, warn}; /// An error, occurred during query processing: /// either during the connection ([`ConnectionError`]) or before/after it. @@ -746,7 +746,7 @@ impl PostgresBackend { match e { QueryError::Shutdown => return Ok(ProcessMsgResult::Break), QueryError::SimulatedConnectionError => { - return Err(QueryError::SimulatedConnectionError) + return Err(QueryError::SimulatedConnectionError); } err @ QueryError::Reconnect => { // Instruct the client to reconnect, stop processing messages @@ -1020,7 +1020,9 @@ fn log_query_error(query: &str, e: &QueryError) { } } QueryError::Disconnected(other_connection_error) => { - error!("query handler for '{query}' failed with connection error: {other_connection_error:?}") + error!( + "query handler for '{query}' failed with connection error: {other_connection_error:?}" + ) } QueryError::SimulatedConnectionError => { error!("query handler for query '{query}' failed due to a simulated connection error") diff --git a/libs/postgres_backend/tests/simple_select.rs b/libs/postgres_backend/tests/simple_select.rs index 3fcfbf4a03..907ef9eed3 100644 --- a/libs/postgres_backend/tests/simple_select.rs +++ b/libs/postgres_backend/tests/simple_select.rs @@ -1,10 +1,11 @@ +use std::io::Cursor; +use std::sync::Arc; + /// Test postgres_backend_async with tokio_postgres use once_cell::sync::Lazy; use postgres_backend::{AuthType, Handler, PostgresBackend, QueryError}; use pq_proto::{BeMessage, RowDescriptor}; use rustls::crypto::ring; -use std::io::Cursor; -use std::sync::Arc; use tokio::io::{AsyncRead, AsyncWrite}; use tokio::net::{TcpListener, TcpStream}; use tokio_postgres::config::SslMode; diff --git a/libs/postgres_connection/src/lib.rs b/libs/postgres_connection/src/lib.rs index e3d31c6cfc..cd981b3729 100644 --- a/libs/postgres_connection/src/lib.rs +++ b/libs/postgres_connection/src/lib.rs @@ -1,9 +1,10 @@ #![deny(unsafe_code)] #![deny(clippy::undocumented_unsafe_blocks)] -use anyhow::{bail, Context}; -use itertools::Itertools; use std::borrow::Cow; use std::fmt; + +use anyhow::{Context, bail}; +use itertools::Itertools; use url::Host; /// Parses a string of format either `host:port` or `host` into a corresponding pair. @@ -29,9 +30,10 @@ pub fn parse_host_port>(host_port: S) -> Result<(Host, Option #[cfg(test)] mod tests_parse_host_port { - use crate::parse_host_port; use url::Host; + use crate::parse_host_port; + #[test] fn test_normal() { let (host, port) = parse_host_port("hello:123").unwrap(); @@ -207,10 +209,11 @@ impl fmt::Debug for PgConnectionConfig { #[cfg(test)] mod tests_pg_connection_config { - use crate::PgConnectionConfig; use once_cell::sync::Lazy; use url::Host; + use crate::PgConnectionConfig; + static STUB_HOST: Lazy = Lazy::new(|| Host::Domain("stub.host.example".to_owned())); #[test] diff --git a/libs/postgres_ffi/benches/waldecoder.rs b/libs/postgres_ffi/benches/waldecoder.rs index c8cf0d322a..2e1d62e452 100644 --- a/libs/postgres_ffi/benches/waldecoder.rs +++ b/libs/postgres_ffi/benches/waldecoder.rs @@ -1,6 +1,6 @@ use std::ffi::CStr; -use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use criterion::{Bencher, Criterion, criterion_group, criterion_main}; use postgres_ffi::v17::wal_generator::LogicalMessageGenerator; use postgres_ffi::v17::waldecoder_handler::WalStreamDecoderHandler; use postgres_ffi::waldecoder::WalStreamDecoder; diff --git a/libs/postgres_ffi/build.rs b/libs/postgres_ffi/build.rs index d3a85f2683..cdebd43f6f 100644 --- a/libs/postgres_ffi/build.rs +++ b/libs/postgres_ffi/build.rs @@ -4,7 +4,7 @@ use std::env; use std::path::PathBuf; use std::process::Command; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; use bindgen::callbacks::{DeriveInfo, ParseCallbacks}; #[derive(Debug)] diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs index 301bc2f16e..8dfd8d8750 100644 --- a/libs/postgres_ffi/src/lib.rs +++ b/libs/postgres_ffi/src/lib.rs @@ -21,7 +21,9 @@ macro_rules! postgres_ffi { pub mod bindings { // bindgen generates bindings for a lot of stuff we don't need #![allow(dead_code)] + #![allow(unsafe_op_in_unsafe_fn)] #![allow(clippy::undocumented_unsafe_blocks)] + #![allow(clippy::ptr_offset_with_cast)] use serde::{Deserialize, Serialize}; include!(concat!( @@ -43,8 +45,7 @@ macro_rules! postgres_ffi { pub const PG_MAJORVERSION: &str = stringify!($version); // Re-export some symbols from bindings - pub use bindings::DBState_DB_SHUTDOWNED; - pub use bindings::{CheckPoint, ControlFileData, XLogRecord}; + pub use bindings::{CheckPoint, ControlFileData, DBState_DB_SHUTDOWNED, XLogRecord}; pub const ZERO_CHECKPOINT: bytes::Bytes = bytes::Bytes::from_static(&[0u8; xlog_utils::SIZEOF_CHECKPOINT]); @@ -221,21 +222,17 @@ pub mod relfile_utils; pub mod walrecord; // Export some widely used datatypes that are unlikely to change across Postgres versions -pub use v14::bindings::RepOriginId; -pub use v14::bindings::{uint32, uint64, Oid}; -pub use v14::bindings::{BlockNumber, OffsetNumber}; -pub use v14::bindings::{MultiXactId, TransactionId}; -pub use v14::bindings::{TimeLineID, TimestampTz, XLogRecPtr, XLogSegNo}; - +pub use v14::bindings::{ + BlockNumber, CheckPoint, ControlFileData, MultiXactId, OffsetNumber, Oid, PageHeaderData, + RepOriginId, TimeLineID, TimestampTz, TransactionId, XLogRecPtr, XLogRecord, XLogSegNo, uint32, + uint64, +}; // Likewise for these, although the assumption that these don't change is a little more iffy. pub use v14::bindings::{MultiXactOffset, MultiXactStatus}; -pub use v14::bindings::{PageHeaderData, XLogRecord}; pub use v14::xlog_utils::{ XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD, }; -pub use v14::bindings::{CheckPoint, ControlFileData}; - // from pg_config.h. These can be changed with configure options --with-blocksize=BLOCKSIZE and // --with-segsize=SEGSIZE, but assume the defaults for now. pub const BLCKSZ: u16 = 8192; @@ -246,13 +243,11 @@ pub const WAL_SEGMENT_SIZE: usize = 16 * 1024 * 1024; pub const MAX_SEND_SIZE: usize = XLOG_BLCKSZ * 16; // Export some version independent functions that are used outside of this mod -pub use v14::xlog_utils::encode_logical_message; -pub use v14::xlog_utils::get_current_timestamp; -pub use v14::xlog_utils::to_pg_timestamp; -pub use v14::xlog_utils::try_from_pg_timestamp; -pub use v14::xlog_utils::XLogFileName; - pub use v14::bindings::DBState_DB_SHUTDOWNED; +pub use v14::xlog_utils::{ + XLogFileName, encode_logical_message, get_current_timestamp, to_pg_timestamp, + try_from_pg_timestamp, +}; pub fn bkpimage_is_compressed(bimg_info: u8, version: u32) -> bool { dispatch_pgversion!(version, pgv::bindings::bkpimg_is_compressed(bimg_info)) @@ -355,8 +350,9 @@ pub fn fsm_logical_to_physical(addr: BlockNumber) -> BlockNumber { } pub mod waldecoder { - use bytes::{Buf, Bytes, BytesMut}; use std::num::NonZeroU32; + + use bytes::{Buf, Bytes, BytesMut}; use thiserror::Error; use utils::lsn::Lsn; diff --git a/libs/postgres_ffi/src/pg_constants.rs b/libs/postgres_ffi/src/pg_constants.rs index e343473d77..b0bdd8a8da 100644 --- a/libs/postgres_ffi/src/pg_constants.rs +++ b/libs/postgres_ffi/src/pg_constants.rs @@ -9,8 +9,7 @@ //! comments on them. //! -use crate::PageHeaderData; -use crate::BLCKSZ; +use crate::{BLCKSZ, PageHeaderData}; // // From pg_tablespace_d.h diff --git a/libs/postgres_ffi/src/walrecord.rs b/libs/postgres_ffi/src/walrecord.rs index fce37e2fdd..1ccf4590a9 100644 --- a/libs/postgres_ffi/src/walrecord.rs +++ b/libs/postgres_ffi/src/walrecord.rs @@ -3,18 +3,16 @@ //! //! TODO: Generate separate types for each supported PG version -use crate::pg_constants; -use crate::XLogRecord; -use crate::{ - BlockNumber, MultiXactId, MultiXactOffset, MultiXactStatus, Oid, RepOriginId, TimestampTz, - TransactionId, -}; -use crate::{BLCKSZ, XLOG_SIZE_OF_XLOG_RECORD}; use bytes::{Buf, Bytes}; use serde::{Deserialize, Serialize}; use utils::bin_ser::DeserializeError; use utils::lsn::Lsn; +use crate::{ + BLCKSZ, BlockNumber, MultiXactId, MultiXactOffset, MultiXactStatus, Oid, RepOriginId, + TimestampTz, TransactionId, XLOG_SIZE_OF_XLOG_RECORD, XLogRecord, pg_constants, +}; + #[repr(C)] #[derive(Clone, Debug, Serialize, Deserialize)] pub struct XlMultiXactCreate { @@ -508,9 +506,10 @@ pub fn decode_wal_record( } pub mod v14 { - use crate::{OffsetNumber, TransactionId}; use bytes::{Buf, Bytes}; + use crate::{OffsetNumber, TransactionId}; + #[repr(C)] #[derive(Debug)] pub struct XlHeapInsert { @@ -678,9 +677,10 @@ pub mod v15 { } pub mod v16 { + use bytes::{Buf, Bytes}; + pub use super::v14::{XlHeapInsert, XlHeapLockUpdated, XlHeapMultiInsert, XlParameterChange}; use crate::{OffsetNumber, TransactionId}; - use bytes::{Buf, Bytes}; pub struct XlHeapDelete { pub xmax: TransactionId, @@ -746,9 +746,10 @@ pub mod v16 { /* Since PG16, we have the Neon RMGR (RM_NEON_ID) to manage Neon-flavored WAL. */ pub mod rm_neon { - use crate::{OffsetNumber, TransactionId}; use bytes::{Buf, Bytes}; + use crate::{OffsetNumber, TransactionId}; + #[repr(C)] #[derive(Debug)] pub struct XlNeonHeapInsert { @@ -858,14 +859,14 @@ pub mod v16 { } pub mod v17 { - pub use super::v14::XlHeapLockUpdated; - pub use crate::{TimeLineID, TimestampTz}; use bytes::{Buf, Bytes}; - pub use super::v16::rm_neon; + pub use super::v14::XlHeapLockUpdated; pub use super::v16::{ XlHeapDelete, XlHeapInsert, XlHeapLock, XlHeapMultiInsert, XlHeapUpdate, XlParameterChange, + rm_neon, }; + pub use crate::{TimeLineID, TimestampTz}; #[repr(C)] #[derive(Debug)] diff --git a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs index 41afcea6c2..6151ce34ac 100644 --- a/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs +++ b/libs/postgres_ffi/wal_craft/src/bin/wal_craft.rs @@ -1,7 +1,9 @@ +use std::path::PathBuf; +use std::str::FromStr; + use anyhow::*; -use clap::{value_parser, Arg, ArgMatches, Command}; +use clap::{Arg, ArgMatches, Command, value_parser}; use postgres::Client; -use std::{path::PathBuf, str::FromStr}; use wal_craft::*; fn main() -> Result<()> { diff --git a/libs/postgres_ffi/wal_craft/src/lib.rs b/libs/postgres_ffi/wal_craft/src/lib.rs index 77dff4ac99..ca9530faef 100644 --- a/libs/postgres_ffi/wal_craft/src/lib.rs +++ b/libs/postgres_ffi/wal_craft/src/lib.rs @@ -1,17 +1,18 @@ -use anyhow::{bail, ensure}; -use camino_tempfile::{tempdir, Utf8TempDir}; -use log::*; -use postgres::types::PgLsn; -use postgres::Client; -use postgres_ffi::{WAL_SEGMENT_SIZE, XLOG_BLCKSZ}; -use postgres_ffi::{ - XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, XLOG_SIZE_OF_XLOG_SHORT_PHD, -}; use std::ffi::OsStr; use std::path::{Path, PathBuf}; use std::process::Command; use std::time::{Duration, Instant}; +use anyhow::{bail, ensure}; +use camino_tempfile::{Utf8TempDir, tempdir}; +use log::*; +use postgres::Client; +use postgres::types::PgLsn; +use postgres_ffi::{ + WAL_SEGMENT_SIZE, XLOG_BLCKSZ, XLOG_SIZE_OF_XLOG_LONG_PHD, XLOG_SIZE_OF_XLOG_RECORD, + XLOG_SIZE_OF_XLOG_SHORT_PHD, +}; + macro_rules! xlog_utils_test { ($version:ident) => { #[path = "."] diff --git a/libs/pq_proto/src/framed.rs b/libs/pq_proto/src/framed.rs index ccbb90e384..8e216d0f44 100644 --- a/libs/pq_proto/src/framed.rs +++ b/libs/pq_proto/src/framed.rs @@ -10,11 +10,10 @@ //! calls. //! //! [Box]: https://docs.rs/futures-util/0.3.26/src/futures_util/lock/bilock.rs.html#107 +use std::future::Future; +use std::io::{self, ErrorKind}; + use bytes::{Buf, BytesMut}; -use std::{ - future::Future, - io::{self, ErrorKind}, -}; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, ReadHalf, WriteHalf}; use crate::{BeMessage, FeMessage, FeStartupPacket, ProtocolError}; diff --git a/libs/pq_proto/src/lib.rs b/libs/pq_proto/src/lib.rs index f99128b76a..e435ffbf7e 100644 --- a/libs/pq_proto/src/lib.rs +++ b/libs/pq_proto/src/lib.rs @@ -5,14 +5,15 @@ pub mod framed; +use std::borrow::Cow; +use std::{fmt, io, str}; + use byteorder::{BigEndian, ReadBytesExt}; use bytes::{Buf, BufMut, Bytes, BytesMut}; use itertools::Itertools; -use serde::{Deserialize, Serialize}; -use std::{borrow::Cow, fmt, io, str}; - // re-export for use in utils pageserver_feedback.rs pub use postgres_protocol::PG_EPOCH; +use serde::{Deserialize, Serialize}; pub type Oid = u32; pub type SystemId = u64; @@ -206,8 +207,8 @@ use rand::distributions::{Distribution, Standard}; impl Distribution for Standard { fn sample(&self, rng: &mut R) -> CancelKeyData { CancelKeyData { - backend_pid: rng.gen(), - cancel_key: rng.gen(), + backend_pid: rng.r#gen(), + cancel_key: rng.r#gen(), } } } @@ -1035,7 +1036,7 @@ impl BeMessage<'_> { buf.put_u8(b'd'); write_body(buf, |buf| { buf.put_u8(b'0'); // matches INTERPRETED_WAL_RECORD_TAG in postgres-protocol - // dependency + // dependency buf.put_u64(rec.streaming_lsn); buf.put_u64(rec.commit_lsn); buf.put_slice(rec.data); diff --git a/libs/tenant_size_model/src/calculation.rs b/libs/tenant_size_model/src/calculation.rs index be00562219..d54876ba2c 100644 --- a/libs/tenant_size_model/src/calculation.rs +++ b/libs/tenant_size_model/src/calculation.rs @@ -130,11 +130,7 @@ impl StorageModel { break; } } - if possible { - Some(snapshot_later) - } else { - None - } + if possible { Some(snapshot_later) } else { None } } else { None }; diff --git a/libs/tenant_size_model/src/svg.rs b/libs/tenant_size_model/src/svg.rs index 25ebb1c3d8..a3bc937f52 100644 --- a/libs/tenant_size_model/src/svg.rs +++ b/libs/tenant_size_model/src/svg.rs @@ -76,7 +76,10 @@ pub fn draw_svg( let mut result = String::new(); - writeln!(result, "")?; + writeln!( + result, + "" + )?; draw.calculate_svg_layout(); diff --git a/libs/tracing-utils/src/http.rs b/libs/tracing-utils/src/http.rs index 2168beee88..8560d0718c 100644 --- a/libs/tracing-utils/src/http.rs +++ b/libs/tracing-utils/src/http.rs @@ -1,8 +1,8 @@ //! Tracing wrapper for Hyper HTTP server -use hyper0::HeaderMap; -use hyper0::{Body, Request, Response}; use std::future::Future; + +use hyper0::{Body, HeaderMap, Request, Response}; use tracing::Instrument; use tracing_opentelemetry::OpenTelemetrySpanExt; diff --git a/libs/tracing-utils/src/lib.rs b/libs/tracing-utils/src/lib.rs index 818d759eac..72f94d61e4 100644 --- a/libs/tracing-utils/src/lib.rs +++ b/libs/tracing-utils/src/lib.rs @@ -36,11 +36,11 @@ pub mod http; -use opentelemetry::trace::TracerProvider; use opentelemetry::KeyValue; +use opentelemetry::trace::TracerProvider; use tracing::Subscriber; -use tracing_subscriber::registry::LookupSpan; use tracing_subscriber::Layer; +use tracing_subscriber::registry::LookupSpan; /// Set up OpenTelemetry exporter, using configuration from environment variables. /// diff --git a/libs/utils/benches/benchmarks.rs b/libs/utils/benches/benchmarks.rs index 348e27ac47..12c620ec87 100644 --- a/libs/utils/benches/benchmarks.rs +++ b/libs/utils/benches/benchmarks.rs @@ -1,6 +1,6 @@ use std::time::Duration; -use criterion::{criterion_group, criterion_main, Bencher, Criterion}; +use criterion::{Bencher, Criterion, criterion_group, criterion_main}; use pprof::criterion::{Output, PProfProfiler}; use utils::id; use utils::logging::log_slow; diff --git a/libs/utils/src/auth.rs b/libs/utils/src/auth.rs index 4bfd0ab055..cc5b0b1d13 100644 --- a/libs/utils/src/auth.rs +++ b/libs/utils/src/auth.rs @@ -1,12 +1,15 @@ // For details about authentication see docs/authentication.md -use arc_swap::ArcSwap; -use std::{borrow::Cow, fmt::Display, fs, sync::Arc}; +use std::borrow::Cow; +use std::fmt::Display; +use std::fs; +use std::sync::Arc; use anyhow::Result; +use arc_swap::ArcSwap; use camino::Utf8Path; use jsonwebtoken::{ - decode, encode, Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, + Algorithm, DecodingKey, EncodingKey, Header, TokenData, Validation, decode, encode, }; use serde::{Deserialize, Serialize}; @@ -129,7 +132,9 @@ impl JwtAuth { anyhow::bail!("path is neither a directory or a file") }; if decoding_keys.is_empty() { - anyhow::bail!("Configured for JWT auth with zero decoding keys. All JWT gated requests would be rejected."); + anyhow::bail!( + "Configured for JWT auth with zero decoding keys. All JWT gated requests would be rejected." + ); } Ok(Self::new(decoding_keys)) } @@ -175,9 +180,10 @@ pub fn encode_from_key_file(claims: &Claims, key_data: &[u8]) -> Result #[cfg(test)] mod tests { - use super::*; use std::str::FromStr; + use super::*; + // Generated with: // // openssl genpkey -algorithm ed25519 -out ed25519-priv.pem @@ -215,7 +221,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH let encoded_eddsa = "eyJhbGciOiJFZERTQSIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6InRlbmFudCIsInRlbmFudF9pZCI6IjNkMWY3NTk1YjQ2ODIzMDMwNGUwYjczY2VjYmNiMDgxIiwiaXNzIjoibmVvbi5jb250cm9scGxhbmUiLCJpYXQiOjE2Nzg0NDI0Nzl9.rNheBnluMJNgXzSTTJoTNIGy4P_qe0JUHl_nVEGuDCTgHOThPVr552EnmKccrCKquPeW3c2YUk0Y9Oh4KyASAw"; // Check it can be validated with the public key - let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]); + let auth = JwtAuth::new(vec![ + DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(), + ]); let claims_from_token = auth.decode(encoded_eddsa).unwrap().claims; assert_eq!(claims_from_token, expected_claims); } @@ -230,7 +238,9 @@ MC4CAQAwBQYDK2VwBCIEID/Drmc1AA6U/znNRWpF3zEGegOATQxfkdWxitcOMsIH let encoded = encode_from_key_file(&claims, TEST_PRIV_KEY_ED25519).unwrap(); // decode it back - let auth = JwtAuth::new(vec![DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap()]); + let auth = JwtAuth::new(vec![ + DecodingKey::from_ed_pem(TEST_PUB_KEY_ED25519).unwrap(), + ]); let decoded = auth.decode(&encoded).unwrap(); assert_eq!(decoded.claims, claims); diff --git a/libs/utils/src/backoff.rs b/libs/utils/src/backoff.rs index e6503fe377..4a4c4eedbb 100644 --- a/libs/utils/src/backoff.rs +++ b/libs/utils/src/backoff.rs @@ -121,10 +121,12 @@ where #[cfg(test)] mod tests { - use super::*; use std::io; + use tokio::sync::Mutex; + use super::*; + #[test] fn backoff_defaults_produce_growing_backoff_sequence() { let mut current_backoff_value = None; diff --git a/libs/utils/src/bin_ser.rs b/libs/utils/src/bin_ser.rs index 4d173d0726..2861baeee5 100644 --- a/libs/utils/src/bin_ser.rs +++ b/libs/utils/src/bin_ser.rs @@ -13,9 +13,11 @@ #![warn(missing_docs)] -use bincode::Options; -use serde::{de::DeserializeOwned, Serialize}; use std::io::{self, Read, Write}; + +use bincode::Options; +use serde::Serialize; +use serde::de::DeserializeOwned; use thiserror::Error; /// An error that occurred during a deserialize operation @@ -261,10 +263,12 @@ impl LeSer for T {} #[cfg(test)] mod tests { - use super::DeserializeError; - use serde::{Deserialize, Serialize}; use std::io::Cursor; + use serde::{Deserialize, Serialize}; + + use super::DeserializeError; + #[derive(Debug, PartialEq, Eq, Serialize, Deserialize)] pub struct ShortStruct { a: u8, diff --git a/libs/utils/src/circuit_breaker.rs b/libs/utils/src/circuit_breaker.rs index e1ddfd8650..46a6584d66 100644 --- a/libs/utils/src/circuit_breaker.rs +++ b/libs/utils/src/circuit_breaker.rs @@ -1,7 +1,5 @@ -use std::{ - fmt::Display, - time::{Duration, Instant}, -}; +use std::fmt::Display; +use std::time::{Duration, Instant}; use metrics::IntCounter; diff --git a/libs/utils/src/completion.rs b/libs/utils/src/completion.rs index f65c080ad4..973d754715 100644 --- a/libs/utils/src/completion.rs +++ b/libs/utils/src/completion.rs @@ -1,4 +1,5 @@ -use tokio_util::task::{task_tracker::TaskTrackerToken, TaskTracker}; +use tokio_util::task::TaskTracker; +use tokio_util::task::task_tracker::TaskTrackerToken; /// While a reference is kept around, the associated [`Barrier::wait`] will wait. /// diff --git a/libs/utils/src/crashsafe.rs b/libs/utils/src/crashsafe.rs index 5241ab183c..290a5b2686 100644 --- a/libs/utils/src/crashsafe.rs +++ b/libs/utils/src/crashsafe.rs @@ -1,9 +1,7 @@ +use std::borrow::Cow; +use std::fs::{self, File}; +use std::io::{self, Write}; use std::os::fd::AsRawFd; -use std::{ - borrow::Cow, - fs::{self, File}, - io::{self, Write}, -}; use camino::{Utf8Path, Utf8PathBuf}; diff --git a/libs/utils/src/env.rs b/libs/utils/src/env.rs index a1bcec9229..2a85f54a01 100644 --- a/libs/utils/src/env.rs +++ b/libs/utils/src/env.rs @@ -1,6 +1,7 @@ //! Wrapper around `std::env::var` for parsing environment variables. -use std::{fmt::Display, str::FromStr}; +use std::fmt::Display; +use std::str::FromStr; /// For types `V` that implement [`FromStr`]. pub fn var(varname: &str) -> Option diff --git a/libs/utils/src/failpoint_support.rs b/libs/utils/src/failpoint_support.rs index fc998ad9a9..ce014eb0ac 100644 --- a/libs/utils/src/failpoint_support.rs +++ b/libs/utils/src/failpoint_support.rs @@ -127,6 +127,9 @@ pub async fn failpoint_sleep_cancellable_helper( tracing::info!("failpoint {:?}: sleep done", name); } +/// Initialize the configured failpoints +/// +/// You must call this function before any concurrent threads do operations. pub fn init() -> fail::FailScenario<'static> { // The failpoints lib provides support for parsing the `FAILPOINTS` env var. // We want non-default behavior for `exit`, though, so, we handle it separately. @@ -134,7 +137,10 @@ pub fn init() -> fail::FailScenario<'static> { // Format for FAILPOINTS is "name=actions" separated by ";". let actions = std::env::var("FAILPOINTS"); if actions.is_ok() { - std::env::remove_var("FAILPOINTS"); + // SAFETY: this function should before any threads start and access env vars concurrently + unsafe { + std::env::remove_var("FAILPOINTS"); + } } else { // let the library handle non-utf8, or nothing for not present } diff --git a/libs/utils/src/fs_ext.rs b/libs/utils/src/fs_ext.rs index 8e53d2c79b..a406ab0378 100644 --- a/libs/utils/src/fs_ext.rs +++ b/libs/utils/src/fs_ext.rs @@ -58,9 +58,8 @@ where #[cfg(test)] mod test { - use crate::fs_ext::{is_directory_empty, list_dir}; - use super::ignore_absent_files; + use crate::fs_ext::{is_directory_empty, list_dir}; #[test] fn is_empty_dir() { diff --git a/libs/utils/src/fs_ext/rename_noreplace.rs b/libs/utils/src/fs_ext/rename_noreplace.rs index 897e30d7f1..fc6f794b57 100644 --- a/libs/utils/src/fs_ext/rename_noreplace.rs +++ b/libs/utils/src/fs_ext/rename_noreplace.rs @@ -38,7 +38,8 @@ pub fn rename_noreplace( #[cfg(test)] mod test { - use std::{fs, path::PathBuf}; + use std::fs; + use std::path::PathBuf; use super::*; diff --git a/libs/utils/src/generation.rs b/libs/utils/src/generation.rs index 44565ee6a2..b5e4a4644a 100644 --- a/libs/utils/src/generation.rs +++ b/libs/utils/src/generation.rs @@ -169,9 +169,9 @@ mod test { ]; let mut s = String::new(); - for (line, gen, expected) in examples { + for (line, gen_, expected) in examples { s.clear(); - write!(s, "{}", &gen.get_suffix()).expect("string grows"); + write!(s, "{}", &gen_.get_suffix()).expect("string grows"); assert_eq!(s, expected, "example on {line}"); } } diff --git a/libs/utils/src/guard_arc_swap.rs b/libs/utils/src/guard_arc_swap.rs index cec5202460..26cd640d3b 100644 --- a/libs/utils/src/guard_arc_swap.rs +++ b/libs/utils/src/guard_arc_swap.rs @@ -1,8 +1,9 @@ //! A wrapper around `ArcSwap` that ensures there is only one writer at a time and writes //! don't block reads. -use arc_swap::ArcSwap; use std::sync::Arc; + +use arc_swap::ArcSwap; use tokio::sync::TryLockError; pub struct GuardArcSwap { diff --git a/libs/utils/src/id.rs b/libs/utils/src/id.rs index eb91839504..6016c23a01 100644 --- a/libs/utils/src/id.rs +++ b/libs/utils/src/id.rs @@ -1,5 +1,6 @@ +use std::fmt; use std::num::ParseIntError; -use std::{fmt, str::FromStr}; +use std::str::FromStr; use anyhow::Context; use hex::FromHex; @@ -215,7 +216,7 @@ macro_rules! id_newtype { impl AsRef<[u8]> for $t { fn as_ref(&self) -> &[u8] { - &self.0 .0 + &self.0.0 } } @@ -367,9 +368,8 @@ impl FromStr for NodeId { mod tests { use serde_assert::{Deserializer, Serializer, Token, Tokens}; - use crate::bin_ser::BeSer; - use super::*; + use crate::bin_ser::BeSer; #[test] fn test_id_serde_non_human_readable() { diff --git a/libs/utils/src/leaky_bucket.rs b/libs/utils/src/leaky_bucket.rs index 0cc58738c0..2398f92766 100644 --- a/libs/utils/src/leaky_bucket.rs +++ b/libs/utils/src/leaky_bucket.rs @@ -21,15 +21,12 @@ //! //! Another explaination can be found here: -use std::{ - sync::{ - atomic::{AtomicU64, Ordering}, - Mutex, - }, - time::Duration, -}; +use std::sync::Mutex; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::Duration; -use tokio::{sync::Notify, time::Instant}; +use tokio::sync::Notify; +use tokio::time::Instant; pub struct LeakyBucketConfig { /// This is the "time cost" of a single request unit. diff --git a/libs/utils/src/linux_socket_ioctl.rs b/libs/utils/src/linux_socket_ioctl.rs index 5ae0e86af8..766529838c 100644 --- a/libs/utils/src/linux_socket_ioctl.rs +++ b/libs/utils/src/linux_socket_ioctl.rs @@ -2,21 +2,23 @@ //! //! -use std::{ - io, - mem::MaybeUninit, - os::{fd::RawFd, raw::c_int}, -}; +use std::io; +use std::mem::MaybeUninit; +use std::os::fd::RawFd; +use std::os::raw::c_int; use nix::libc::{FIONREAD, TIOCOUTQ}; unsafe fn do_ioctl(socket_fd: RawFd, cmd: nix::libc::Ioctl) -> io::Result { let mut inq: MaybeUninit = MaybeUninit::uninit(); - let err = nix::libc::ioctl(socket_fd, cmd, inq.as_mut_ptr()); - if err == 0 { - Ok(inq.assume_init()) - } else { - Err(io::Error::last_os_error()) + // SAFETY: encapsulating fn is unsafe, we require `socket_fd` to be a valid file descriptor + unsafe { + let err = nix::libc::ioctl(socket_fd, cmd, inq.as_mut_ptr()); + if err == 0 { + Ok(inq.assume_init()) + } else { + Err(io::Error::last_os_error()) + } } } @@ -24,12 +26,14 @@ unsafe fn do_ioctl(socket_fd: RawFd, cmd: nix::libc::Ioctl) -> io::Result /// /// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor. pub unsafe fn inq(socket_fd: RawFd) -> io::Result { - do_ioctl(socket_fd, FIONREAD) + // SAFETY: encapsulating fn is unsafe + unsafe { do_ioctl(socket_fd, FIONREAD) } } /// # Safety /// /// Caller must ensure that `socket_fd` is a valid TCP socket file descriptor. pub unsafe fn outq(socket_fd: RawFd) -> io::Result { - do_ioctl(socket_fd, TIOCOUTQ) + // SAFETY: encapsulating fn is unsafe + unsafe { do_ioctl(socket_fd, TIOCOUTQ) } } diff --git a/libs/utils/src/lock_file.rs b/libs/utils/src/lock_file.rs index 3a2ed3e830..6aeeeca021 100644 --- a/libs/utils/src/lock_file.rs +++ b/libs/utils/src/lock_file.rs @@ -6,16 +6,15 @@ //! there for potential pitfalls with lock files that are used //! to store PIDs (pidfiles). -use std::{ - fs, - io::{Read, Write}, - ops::Deref, - os::unix::prelude::AsRawFd, -}; +use std::fs; +use std::io::{Read, Write}; +use std::ops::Deref; +use std::os::unix::prelude::AsRawFd; use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; -use nix::{errno::Errno::EAGAIN, fcntl}; +use nix::errno::Errno::EAGAIN; +use nix::fcntl; use crate::crashsafe; diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 2c36942f43..881f1e765d 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -273,7 +273,9 @@ fn log_panic_to_stderr( location: Option>, backtrace: &std::backtrace::Backtrace, ) { - eprintln!("panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}"); + eprintln!( + "panic while tracing is unconfigured: thread '{thread}' panicked at '{msg}', {location:?}\nStack backtrace:\n{backtrace}" + ); } struct PrettyLocation<'a, 'b>(&'a std::panic::Location<'b>); @@ -361,7 +363,8 @@ pub async fn log_slow(name: &str, threshold: Duration, f: impl Future= num { - Ok(()) - } else { - Err(cnt) - } + if cnt >= num { Ok(()) } else { Err(cnt) } } /// Register and return a channel that will be notified when a number arrives, @@ -325,9 +322,10 @@ where #[cfg(test)] mod tests { - use super::*; use std::sync::Arc; + use super::*; + impl MonotonicCounter for i32 { fn cnt_advance(&mut self, val: i32) { assert!(*self <= val); diff --git a/libs/utils/src/serde_percent.rs b/libs/utils/src/serde_percent.rs index 36e874a161..ca1e7aa25b 100644 --- a/libs/utils/src/serde_percent.rs +++ b/libs/utils/src/serde_percent.rs @@ -12,11 +12,7 @@ pub struct Percent(#[serde(deserialize_with = "deserialize_pct_0_to_100")] u8); impl Percent { pub const fn new(pct: u8) -> Option { - if pct <= 100 { - Some(Percent(pct)) - } else { - None - } + if pct <= 100 { Some(Percent(pct)) } else { None } } pub fn get(&self) -> u8 { diff --git a/libs/utils/src/shard.rs b/libs/utils/src/shard.rs index d98284f969..c8c410a725 100644 --- a/libs/utils/src/shard.rs +++ b/libs/utils/src/shard.rs @@ -1,6 +1,7 @@ //! See `pageserver_api::shard` for description on sharding. -use std::{ops::RangeInclusive, str::FromStr}; +use std::ops::RangeInclusive; +use std::str::FromStr; use hex::FromHex; use serde::{Deserialize, Serialize}; @@ -59,11 +60,7 @@ impl ShardCount { /// This method returns the actual number of shards, i.e. if our internal value is /// zero, we return 1 (unsharded tenants have 1 shard). pub fn count(&self) -> u8 { - if self.0 > 0 { - self.0 - } else { - 1 - } + if self.0 > 0 { self.0 } else { 1 } } /// The literal internal value: this is **not** the number of shards in the diff --git a/libs/utils/src/signals.rs b/libs/utils/src/signals.rs index c37e9aea58..f2be1957c4 100644 --- a/libs/utils/src/signals.rs +++ b/libs/utils/src/signals.rs @@ -1,7 +1,7 @@ +pub use signal_hook::consts::TERM_SIGNALS; +pub use signal_hook::consts::signal::*; use signal_hook::iterator::Signals; -pub use signal_hook::consts::{signal::*, TERM_SIGNALS}; - pub enum Signal { Quit, Interrupt, diff --git a/libs/utils/src/simple_rcu.rs b/libs/utils/src/simple_rcu.rs index 6700f86e4a..fabdf9df46 100644 --- a/libs/utils/src/simple_rcu.rs +++ b/libs/utils/src/simple_rcu.rs @@ -44,8 +44,7 @@ #![warn(missing_docs)] use std::ops::Deref; -use std::sync::{Arc, Weak}; -use std::sync::{RwLock, RwLockWriteGuard}; +use std::sync::{Arc, RwLock, RwLockWriteGuard, Weak}; use tokio::sync::watch; @@ -219,10 +218,11 @@ impl RcuWaitList { #[cfg(test)] mod tests { - use super::*; use std::sync::Mutex; use std::time::Duration; + use super::*; + #[tokio::test] async fn two_writers() { let rcu = Rcu::new(1); diff --git a/libs/utils/src/sync/gate.rs b/libs/utils/src/sync/gate.rs index 0a1ed81621..93460785bf 100644 --- a/libs/utils/src/sync/gate.rs +++ b/libs/utils/src/sync/gate.rs @@ -1,10 +1,6 @@ -use std::{ - sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }, - time::Duration, -}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::time::Duration; /// Gates are a concurrency helper, primarily used for implementing safe shutdown. /// diff --git a/libs/utils/src/sync/heavier_once_cell.rs b/libs/utils/src/sync/heavier_once_cell.rs index 66c2065554..8f8401b35d 100644 --- a/libs/utils/src/sync/heavier_once_cell.rs +++ b/libs/utils/src/sync/heavier_once_cell.rs @@ -1,7 +1,6 @@ -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, MutexGuard, -}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex, MutexGuard}; + use tokio::sync::Semaphore; /// Custom design like [`tokio::sync::OnceCell`] but using [`OwnedSemaphorePermit`] instead of @@ -301,14 +300,13 @@ impl Drop for InitPermit { #[cfg(test)] mod tests { + use std::convert::Infallible; + use std::pin::{Pin, pin}; + use std::time::Duration; + use futures::Future; use super::*; - use std::{ - convert::Infallible, - pin::{pin, Pin}, - time::Duration, - }; #[tokio::test] async fn many_initializers() { diff --git a/libs/utils/src/sync/spsc_fold.rs b/libs/utils/src/sync/spsc_fold.rs index 0cab291d51..7dfbf40411 100644 --- a/libs/utils/src/sync/spsc_fold.rs +++ b/libs/utils/src/sync/spsc_fold.rs @@ -1,4 +1,5 @@ -use core::{future::poll_fn, task::Poll}; +use core::future::poll_fn; +use core::task::Poll; use std::sync::{Arc, Mutex}; use diatomic_waker::DiatomicWaker; diff --git a/libs/utils/src/tcp_listener.rs b/libs/utils/src/tcp_listener.rs index 6b35d3d63a..6a4a77127d 100644 --- a/libs/utils/src/tcp_listener.rs +++ b/libs/utils/src/tcp_listener.rs @@ -1,9 +1,8 @@ -use std::{ - io, - net::{TcpListener, ToSocketAddrs}, -}; +use std::io; +use std::net::{TcpListener, ToSocketAddrs}; -use nix::sys::socket::{setsockopt, sockopt::ReuseAddr}; +use nix::sys::socket::setsockopt; +use nix::sys::socket::sockopt::ReuseAddr; /// Bind a [`TcpListener`] to addr with `SO_REUSEADDR` set to true. pub fn bind(addr: A) -> io::Result { diff --git a/libs/utils/src/tracing_span_assert.rs b/libs/utils/src/tracing_span_assert.rs index add2fa7920..3d15e08400 100644 --- a/libs/utils/src/tracing_span_assert.rs +++ b/libs/utils/src/tracing_span_assert.rs @@ -172,16 +172,14 @@ fn tracing_subscriber_configured() -> bool { #[cfg(test)] mod tests { + use std::collections::HashSet; + use std::fmt::{self}; + use std::hash::{Hash, Hasher}; + use tracing_subscriber::prelude::*; use super::*; - use std::{ - collections::HashSet, - fmt::{self}, - hash::{Hash, Hasher}, - }; - struct MemoryIdentity<'a>(&'a dyn Extractor); impl MemoryIdentity<'_> { diff --git a/libs/utils/src/try_rcu.rs b/libs/utils/src/try_rcu.rs index 6b53ab1316..30540c27d0 100644 --- a/libs/utils/src/try_rcu.rs +++ b/libs/utils/src/try_rcu.rs @@ -44,10 +44,12 @@ where #[cfg(test)] mod tests { - use super::*; - use arc_swap::ArcSwap; use std::sync::Arc; + use arc_swap::ArcSwap; + + use super::*; + #[test] fn test_try_rcu_success() { let swap = ArcSwap::from(Arc::new(42)); diff --git a/libs/utils/src/vec_map.rs b/libs/utils/src/vec_map.rs index 1fe048c6f0..eded86af3e 100644 --- a/libs/utils/src/vec_map.rs +++ b/libs/utils/src/vec_map.rs @@ -1,4 +1,6 @@ -use std::{alloc::Layout, cmp::Ordering, ops::RangeBounds}; +use std::alloc::Layout; +use std::cmp::Ordering; +use std::ops::RangeBounds; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum VecMapOrdering { @@ -214,7 +216,8 @@ fn extract_key(entry: &(K, V)) -> &K { #[cfg(test)] mod tests { - use std::{collections::BTreeMap, ops::Bound}; + use std::collections::BTreeMap; + use std::ops::Bound; use super::{VecMap, VecMapOrdering}; diff --git a/libs/utils/src/zstd.rs b/libs/utils/src/zstd.rs index be2dcc00f5..96c2a83951 100644 --- a/libs/utils/src/zstd.rs +++ b/libs/utils/src/zstd.rs @@ -1,19 +1,14 @@ use std::io::SeekFrom; use anyhow::{Context, Result}; -use async_compression::{ - tokio::{bufread::ZstdDecoder, write::ZstdEncoder}, - zstd::CParameter, - Level, -}; +use async_compression::Level; +use async_compression::tokio::bufread::ZstdDecoder; +use async_compression::tokio::write::ZstdEncoder; +use async_compression::zstd::CParameter; use camino::Utf8Path; use nix::NixPath; -use tokio::{ - fs::{File, OpenOptions}, - io::AsyncBufRead, - io::AsyncSeekExt, - io::AsyncWriteExt, -}; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncBufRead, AsyncSeekExt, AsyncWriteExt}; use tokio_tar::{Archive, Builder, HeaderMode}; use walkdir::WalkDir; diff --git a/libs/utils/tests/bin_ser_test.rs b/libs/utils/tests/bin_ser_test.rs index b995b61b78..e0c8cdde00 100644 --- a/libs/utils/tests/bin_ser_test.rs +++ b/libs/utils/tests/bin_ser_test.rs @@ -1,7 +1,8 @@ +use std::io::Read; + use bytes::{Buf, BytesMut}; use hex_literal::hex; use serde::Deserialize; -use std::io::Read; use utils::bin_ser::LeSer; #[derive(Debug, PartialEq, Eq, Deserialize)] diff --git a/libs/wal_decoder/benches/bench_interpret_wal.rs b/libs/wal_decoder/benches/bench_interpret_wal.rs index 846904cf87..ed6ba4d267 100644 --- a/libs/wal_decoder/benches/bench_interpret_wal.rs +++ b/libs/wal_decoder/benches/bench_interpret_wal.rs @@ -1,23 +1,25 @@ -use anyhow::Context; -use criterion::{criterion_group, criterion_main, Criterion}; -use futures::{stream::FuturesUnordered, StreamExt}; -use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; -use postgres_ffi::{waldecoder::WalStreamDecoder, MAX_SEND_SIZE, WAL_SEGMENT_SIZE}; -use pprof::criterion::{Output, PProfProfiler}; -use serde::Deserialize; -use std::{env, num::NonZeroUsize, sync::Arc}; +use std::env; +use std::num::NonZeroUsize; +use std::sync::Arc; +use anyhow::Context; use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::Utf8TempDir; +use criterion::{Criterion, criterion_group, criterion_main}; +use futures::StreamExt; +use futures::stream::FuturesUnordered; +use pageserver_api::shard::{ShardIdentity, ShardStripeSize}; +use postgres_ffi::waldecoder::WalStreamDecoder; +use postgres_ffi::{MAX_SEND_SIZE, WAL_SEGMENT_SIZE}; +use pprof::criterion::{Output, PProfProfiler}; use remote_storage::{ DownloadOpts, GenericRemoteStorage, ListingMode, RemoteStorageConfig, RemoteStorageKind, S3Config, }; +use serde::Deserialize; use tokio_util::sync::CancellationToken; -use utils::{ - lsn::Lsn, - shard::{ShardCount, ShardNumber}, -}; +use utils::lsn::Lsn; +use utils::shard::{ShardCount, ShardNumber}; use wal_decoder::models::InterpretedWalRecord; const S3_BUCKET: &str = "neon-github-public-dev"; @@ -31,7 +33,7 @@ const METADATA_FILENAME: &str = "metadata.json"; static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; #[allow(non_upper_case_globals)] -#[export_name = "malloc_conf"] +#[unsafe(export_name = "malloc_conf")] pub static malloc_conf: &[u8] = b"prof:true,prof_active:true,lg_prof_sample:20\0"; async fn create_s3_client() -> anyhow::Result> { diff --git a/libs/wal_decoder/src/decoder.rs b/libs/wal_decoder/src/decoder.rs index ebb38ceb52..cb0835e894 100644 --- a/libs/wal_decoder/src/decoder.rs +++ b/libs/wal_decoder/src/decoder.rs @@ -3,8 +3,6 @@ use std::collections::HashMap; -use crate::models::*; -use crate::serialized_batch::SerializedValueBatch; use bytes::{Buf, Bytes}; use pageserver_api::key::rel_block_to_key; use pageserver_api::reltag::{RelTag, SlruKind}; @@ -14,6 +12,9 @@ use postgres_ffi::relfile_utils::VISIBILITYMAP_FORKNUM; use postgres_ffi::walrecord::*; use utils::lsn::Lsn; +use crate::models::*; +use crate::serialized_batch::SerializedValueBatch; + impl InterpretedWalRecord { /// Decode and interpreted raw bytes which represent one Postgres WAL record. /// Data blocks which do not match any of the provided shard identities are filtered out. diff --git a/libs/wal_decoder/src/serialized_batch.rs b/libs/wal_decoder/src/serialized_batch.rs index d76f75f51f..b451d6d8e0 100644 --- a/libs/wal_decoder/src/serialized_batch.rs +++ b/libs/wal_decoder/src/serialized_batch.rs @@ -8,20 +8,18 @@ use std::collections::{BTreeSet, HashMap}; use bytes::{Bytes, BytesMut}; -use pageserver_api::key::rel_block_to_key; +use pageserver_api::key::{CompactKey, Key, rel_block_to_key}; use pageserver_api::keyspace::KeySpace; use pageserver_api::record::NeonWalRecord; use pageserver_api::reltag::RelTag; use pageserver_api::shard::ShardIdentity; -use pageserver_api::{key::CompactKey, value::Value}; +use pageserver_api::value::Value; use postgres_ffi::walrecord::{DecodedBkpBlock, DecodedWALRecord}; -use postgres_ffi::{page_is_new, page_set_lsn, pg_constants, BLCKSZ}; +use postgres_ffi::{BLCKSZ, page_is_new, page_set_lsn, pg_constants}; use serde::{Deserialize, Serialize}; use utils::bin_ser::BeSer; use utils::lsn::Lsn; -use pageserver_api::key::Key; - use crate::models::InterpretedWalRecord; static ZERO_PAGE: Bytes = Bytes::from_static(&[0u8; BLCKSZ as usize]); @@ -515,10 +513,11 @@ impl SerializedValueBatch { let empty = self.raw.is_empty(); if cfg!(debug_assertions) && empty { - assert!(self - .metadata - .iter() - .all(|meta| matches!(meta, ValueMeta::Observed(_)))); + assert!( + self.metadata + .iter() + .all(|meta| matches!(meta, ValueMeta::Observed(_))) + ); } !empty diff --git a/libs/wal_decoder/src/wire_format.rs b/libs/wal_decoder/src/wire_format.rs index 52ed5c70b5..5a28128dd8 100644 --- a/libs/wal_decoder/src/wire_format.rs +++ b/libs/wal_decoder/src/wire_format.rs @@ -7,15 +7,12 @@ use utils::lsn::Lsn; use utils::postgres_client::{Compression, InterpretedFormat}; use crate::models::{ - FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords, MetadataRecord, + FlushUncommittedRecords, InterpretedWalRecord, InterpretedWalRecords, MetadataRecord, proto, }; - use crate::serialized_batch::{ ObservedValueMeta, SerializedValueBatch, SerializedValueMeta, ValueMeta, }; -use crate::models::proto; - #[derive(Debug, thiserror::Error)] pub enum ToWireFormatError { #[error("{0}")] @@ -83,8 +80,8 @@ impl ToWireFormat for InterpretedWalRecords { format: InterpretedFormat, compression: Option, ) -> Result { - use async_compression::tokio::write::ZstdEncoder; use async_compression::Level; + use async_compression::tokio::write::ZstdEncoder; let encode_res: Result = match format { InterpretedFormat::Bincode => { diff --git a/libs/walproposer/build.rs b/libs/walproposer/build.rs index 8d5b1ade35..530ceb1327 100644 --- a/libs/walproposer/build.rs +++ b/libs/walproposer/build.rs @@ -1,9 +1,11 @@ //! Links with walproposer, pgcommon, pgport and runs bindgen on walproposer.h //! to generate Rust bindings for it. -use std::{env, path::PathBuf, process::Command}; +use std::env; +use std::path::PathBuf; +use std::process::Command; -use anyhow::{anyhow, Context}; +use anyhow::{Context, anyhow}; const WALPROPOSER_PG_VERSION: &str = "v17"; diff --git a/libs/walproposer/src/api_bindings.rs b/libs/walproposer/src/api_bindings.rs index 2fbea3fe45..d660602149 100644 --- a/libs/walproposer/src/api_bindings.rs +++ b/libs/walproposer/src/api_bindings.rs @@ -3,27 +3,14 @@ #![allow(dead_code)] -use std::ffi::CStr; -use std::ffi::CString; +use std::ffi::{CStr, CString}; -use crate::bindings::uint32; -use crate::bindings::walproposer_api; -use crate::bindings::NeonWALReadResult; -use crate::bindings::PGAsyncReadResult; -use crate::bindings::PGAsyncWriteResult; -use crate::bindings::Safekeeper; -use crate::bindings::Size; -use crate::bindings::StringInfoData; -use crate::bindings::TimestampTz; -use crate::bindings::WalProposer; -use crate::bindings::WalProposerConnStatusType; -use crate::bindings::WalProposerConnectPollStatusType; -use crate::bindings::WalProposerExecStatusType; -use crate::bindings::WalproposerShmemState; -use crate::bindings::XLogRecPtr; -use crate::walproposer::ApiImpl; -use crate::walproposer::StreamingCallback; -use crate::walproposer::WaitResult; +use crate::bindings::{ + NeonWALReadResult, PGAsyncReadResult, PGAsyncWriteResult, Safekeeper, Size, StringInfoData, + TimestampTz, WalProposer, WalProposerConnStatusType, WalProposerConnectPollStatusType, + WalProposerExecStatusType, WalproposerShmemState, XLogRecPtr, uint32, walproposer_api, +}; +use crate::walproposer::{ApiImpl, StreamingCallback, WaitResult}; extern "C" fn get_shmem_state(wp: *mut WalProposer) -> *mut WalproposerShmemState { unsafe { diff --git a/libs/walproposer/src/walproposer.rs b/libs/walproposer/src/walproposer.rs index 60b606c64a..4e50c21fca 100644 --- a/libs/walproposer/src/walproposer.rs +++ b/libs/walproposer/src/walproposer.rs @@ -2,15 +2,15 @@ use std::ffi::CString; -use crate::{ - api_bindings::{create_api, take_vec_u8, Level}, - bindings::{ - NeonWALReadResult, Safekeeper, WalProposer, WalProposerBroadcast, WalProposerConfig, - WalProposerCreate, WalProposerFree, WalProposerPoll, WalProposerStart, - }, -}; use postgres_ffi::WAL_SEGMENT_SIZE; -use utils::{id::TenantTimelineId, lsn::Lsn}; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; + +use crate::api_bindings::{Level, create_api, take_vec_u8}; +use crate::bindings::{ + NeonWALReadResult, Safekeeper, WalProposer, WalProposerBroadcast, WalProposerConfig, + WalProposerCreate, WalProposerFree, WalProposerPoll, WalProposerStart, +}; /// Rust high-level wrapper for C walproposer API. Many methods are not required /// for simple cases, hence todo!() in default implementations. @@ -275,22 +275,17 @@ impl StreamingCallback { #[cfg(test)] mod tests { use core::panic; - use std::{ - cell::Cell, - ffi::CString, - sync::{atomic::AtomicUsize, mpsc::sync_channel}, - }; + use std::cell::{Cell, UnsafeCell}; + use std::ffi::CString; + use std::sync::atomic::AtomicUsize; + use std::sync::mpsc::sync_channel; - use std::cell::UnsafeCell; use utils::id::TenantTimelineId; - use crate::{ - api_bindings::Level, - bindings::{NeonWALReadResult, PG_VERSION_NUM}, - walproposer::Wrapper, - }; - use super::ApiImpl; + use crate::api_bindings::Level; + use crate::bindings::{NeonWALReadResult, PG_VERSION_NUM}; + use crate::walproposer::Wrapper; #[derive(Clone, Copy, Debug)] struct WaitEventsData { diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index bb0f64ca32..f19b4e964d 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -1,17 +1,15 @@ -use std::{collections::HashMap, error::Error as _}; +use std::collections::HashMap; +use std::error::Error as _; use bytes::Bytes; -use reqwest::{IntoUrl, Method, StatusCode}; - use detach_ancestor::AncestorDetached; use http_utils::error::HttpErrorBody; -use pageserver_api::{models::*, shard::TenantShardId}; -use utils::{ - id::{TenantId, TimelineId}, - lsn::Lsn, -}; - +use pageserver_api::models::*; +use pageserver_api::shard::TenantShardId; pub use reqwest::Body as ReqwestBody; +use reqwest::{IntoUrl, Method, StatusCode}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; use crate::BlockUnblock; diff --git a/pageserver/client/src/page_service.rs b/pageserver/client/src/page_service.rs index 47da83b0eb..ef35ac2f48 100644 --- a/pageserver/client/src/page_service.rs +++ b/pageserver/client/src/page_service.rs @@ -1,23 +1,16 @@ use std::sync::{Arc, Mutex}; -use futures::{ - stream::{SplitSink, SplitStream}, - SinkExt, StreamExt, -}; -use pageserver_api::{ - models::{ - PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, - PagestreamGetPageResponse, - }, - reltag::RelTag, +use futures::stream::{SplitSink, SplitStream}; +use futures::{SinkExt, StreamExt}; +use pageserver_api::models::{ + PagestreamBeMessage, PagestreamFeMessage, PagestreamGetPageRequest, PagestreamGetPageResponse, }; +use pageserver_api::reltag::RelTag; use tokio::task::JoinHandle; use tokio_postgres::CopyOutStream; use tokio_util::sync::CancellationToken; -use utils::{ - id::{TenantId, TimelineId}, - lsn::Lsn, -}; +use utils::id::{TenantId, TimelineId}; +use utils::lsn::Lsn; pub struct Client { client: tokio_postgres::Client, diff --git a/pageserver/compaction/src/bin/compaction-simulator.rs b/pageserver/compaction/src/bin/compaction-simulator.rs index c308694ae1..dd35417333 100644 --- a/pageserver/compaction/src/bin/compaction-simulator.rs +++ b/pageserver/compaction/src/bin/compaction-simulator.rs @@ -1,11 +1,11 @@ -use clap::{Parser, Subcommand}; -use pageserver_compaction::helpers::PAGE_SZ; -use pageserver_compaction::simulator::MockTimeline; -use rand::Rng; use std::io::Write; use std::path::{Path, PathBuf}; use std::sync::OnceLock; +use clap::{Parser, Subcommand}; +use pageserver_compaction::helpers::PAGE_SZ; +use pageserver_compaction::simulator::MockTimeline; +use rand::Rng; use utils::project_git_version; project_git_version!(GIT_VERSION); @@ -157,8 +157,9 @@ async fn run_suite() -> anyhow::Result<()> { use std::fs::File; use std::io::Stdout; use std::sync::Mutex; -use tracing_subscriber::fmt::writer::EitherWriter; + use tracing_subscriber::fmt::MakeWriter; +use tracing_subscriber::fmt::writer::EitherWriter; static LOG_FILE: OnceLock>> = OnceLock::new(); fn get_log_output() -> &'static Mutex> { diff --git a/pageserver/compaction/src/compact_tiered.rs b/pageserver/compaction/src/compact_tiered.rs index 02b11910ce..75f43d7ff7 100644 --- a/pageserver/compaction/src/compact_tiered.rs +++ b/pageserver/compaction/src/compact_tiered.rs @@ -17,20 +17,19 @@ //! distance of image layers in LSN dimension is roughly equal to the logical //! database size. For example, if the logical database size is 10 GB, we would //! generate new image layers every 10 GB of WAL. -use futures::StreamExt; -use pageserver_api::shard::ShardIdentity; -use tracing::{debug, info}; - use std::collections::{HashSet, VecDeque}; use std::ops::Range; -use crate::helpers::{ - accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with, PAGE_SZ, -}; -use crate::interface::*; +use futures::StreamExt; +use pageserver_api::shard::ShardIdentity; +use tracing::{debug, info}; use utils::lsn::Lsn; +use crate::helpers::{ + PAGE_SZ, accum_key_values, keyspace_total_size, merge_delta_keys_buffered, overlaps_with, +}; use crate::identify_levels::identify_level; +use crate::interface::*; /// Main entry point to compaction. /// @@ -541,10 +540,11 @@ where } } // Open stream - let key_value_stream = - std::pin::pin!(merge_delta_keys_buffered::(deltas.as_slice(), ctx) + let key_value_stream = std::pin::pin!( + merge_delta_keys_buffered::(deltas.as_slice(), ctx) .await? - .map(Result::<_, anyhow::Error>::Ok)); + .map(Result::<_, anyhow::Error>::Ok) + ); let mut new_jobs = Vec::new(); // Slide a window through the keyspace diff --git a/pageserver/compaction/src/helpers.rs b/pageserver/compaction/src/helpers.rs index 7e4e3042b3..421802eef3 100644 --- a/pageserver/compaction/src/helpers.rs +++ b/pageserver/compaction/src/helpers.rs @@ -1,21 +1,21 @@ //! This file contains generic utility functions over the interface types, //! which could be handy for any compaction implementation. -use crate::interface::*; +use std::collections::{BinaryHeap, VecDeque}; +use std::fmt::Display; +use std::future::Future; +use std::ops::{DerefMut, Range}; +use std::pin::Pin; +use std::task::{Poll, ready}; use futures::future::BoxFuture; use futures::{Stream, StreamExt}; use itertools::Itertools; use pageserver_api::shard::ShardIdentity; use pin_project_lite::pin_project; -use std::collections::BinaryHeap; -use std::collections::VecDeque; -use std::fmt::Display; -use std::future::Future; -use std::ops::{DerefMut, Range}; -use std::pin::Pin; -use std::task::{ready, Poll}; use utils::lsn::Lsn; +use crate::interface::*; + pub const PAGE_SZ: u64 = 8192; pub fn keyspace_total_size( diff --git a/pageserver/compaction/src/identify_levels.rs b/pageserver/compaction/src/identify_levels.rs index e04bd15396..61575e3992 100644 --- a/pageserver/compaction/src/identify_levels.rs +++ b/pageserver/compaction/src/identify_levels.rs @@ -26,15 +26,15 @@ //! file size, the file will still be considered to be part of L0 at the next //! iteration. -use anyhow::bail; use std::collections::BTreeSet; use std::ops::Range; + +use anyhow::bail; +use tracing::{info, trace}; use utils::lsn::Lsn; use crate::interface::*; -use tracing::{info, trace}; - pub struct Level { pub lsn_range: Range, pub layers: Vec, @@ -60,7 +60,11 @@ where if l.lsn_range().start < end_lsn && l.lsn_range().end > end_lsn { // shouldn't happen. Indicates that the caller passed a bogus // end_lsn. - bail!("identify_level() called with end_lsn that does not partition the LSN space: end_lsn {} intersects with layer {}", end_lsn, l.short_id()); + bail!( + "identify_level() called with end_lsn that does not partition the LSN space: end_lsn {} intersects with layer {}", + end_lsn, + l.short_id() + ); } // include image layers sitting exacty at `end_lsn`. let is_image = !l.is_delta(); @@ -246,9 +250,10 @@ impl Level { #[cfg(test)] mod tests { + use std::sync::{Arc, Mutex}; + use super::*; use crate::simulator::{Key, MockDeltaLayer, MockImageLayer, MockLayer}; - use std::sync::{Arc, Mutex}; fn delta(key_range: Range, lsn_range: Range) -> MockLayer { MockLayer::Delta(Arc::new(MockDeltaLayer { diff --git a/pageserver/compaction/src/interface.rs b/pageserver/compaction/src/interface.rs index 92723faeaf..63fbc565cc 100644 --- a/pageserver/compaction/src/interface.rs +++ b/pageserver/compaction/src/interface.rs @@ -3,9 +3,12 @@ //! //! All the heavy lifting is done by the create_image and create_delta //! functions that the implementor provides. -use futures::Future; -use pageserver_api::{key::Key, keyspace::ShardedRange, shard::ShardIdentity}; use std::ops::Range; + +use futures::Future; +use pageserver_api::key::Key; +use pageserver_api::keyspace::ShardedRange; +use pageserver_api::shard::ShardIdentity; use utils::lsn::Lsn; /// Public interface. This is the main thing that the implementor needs to provide diff --git a/pageserver/compaction/src/simulator.rs b/pageserver/compaction/src/simulator.rs index 341fceba6f..bf9f6f2658 100644 --- a/pageserver/compaction/src/simulator.rs +++ b/pageserver/compaction/src/simulator.rs @@ -1,22 +1,17 @@ mod draw; -use draw::{LayerTraceEvent, LayerTraceFile, LayerTraceOp}; +use std::fmt::Write; +use std::ops::Range; +use std::sync::{Arc, Mutex}; +use draw::{LayerTraceEvent, LayerTraceFile, LayerTraceOp}; use futures::StreamExt; use pageserver_api::shard::ShardIdentity; use rand::Rng; use tracing::info; - use utils::lsn::Lsn; -use std::fmt::Write; -use std::ops::Range; -use std::sync::Arc; -use std::sync::Mutex; - -use crate::helpers::PAGE_SZ; -use crate::helpers::{merge_delta_keys, overlaps_with}; - +use crate::helpers::{PAGE_SZ, merge_delta_keys, overlaps_with}; use crate::interface; use crate::interface::CompactionLayer; diff --git a/pageserver/compaction/src/simulator/draw.rs b/pageserver/compaction/src/simulator/draw.rs index 4559db09f1..3d35d1b91e 100644 --- a/pageserver/compaction/src/simulator/draw.rs +++ b/pageserver/compaction/src/simulator/draw.rs @@ -1,14 +1,14 @@ -use super::Key; -use anyhow::Result; use std::cmp::Ordering; -use std::{ - collections::{BTreeMap, BTreeSet, HashSet}, - fmt::Write, - ops::Range, -}; -use svg_fmt::{rgb, BeginSvg, EndSvg, Fill, Stroke, Style}; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::fmt::Write; +use std::ops::Range; + +use anyhow::Result; +use svg_fmt::{BeginSvg, EndSvg, Fill, Stroke, Style, rgb}; use utils::lsn::Lsn; +use super::Key; + // Map values to their compressed coordinate - the index the value // would have in a sorted and deduplicated list of all values. struct CoordinateMap { diff --git a/pageserver/ctl/src/draw_timeline_dir.rs b/pageserver/ctl/src/draw_timeline_dir.rs index 177e65ef79..80ca414543 100644 --- a/pageserver/ctl/src/draw_timeline_dir.rs +++ b/pageserver/ctl/src/draw_timeline_dir.rs @@ -50,18 +50,18 @@ //! ``` //! -use anyhow::{Context, Result}; -use pageserver_api::key::Key; use std::cmp::Ordering; +use std::collections::{BTreeMap, BTreeSet}; use std::io::{self, BufRead}; +use std::ops::Range; use std::path::PathBuf; use std::str::FromStr; -use std::{ - collections::{BTreeMap, BTreeSet}, - ops::Range, -}; -use svg_fmt::{rectangle, rgb, BeginSvg, EndSvg, Fill, Stroke}; -use utils::{lsn::Lsn, project_git_version}; + +use anyhow::{Context, Result}; +use pageserver_api::key::Key; +use svg_fmt::{BeginSvg, EndSvg, Fill, Stroke, rectangle, rgb}; +use utils::lsn::Lsn; +use utils::project_git_version; project_git_version!(GIT_VERSION); diff --git a/pageserver/ctl/src/key.rs b/pageserver/ctl/src/key.rs index c7f0719c41..600f7c412e 100644 --- a/pageserver/ctl/src/key.rs +++ b/pageserver/ctl/src/key.rs @@ -1,11 +1,10 @@ +use std::str::FromStr; + use anyhow::Context; use clap::Parser; -use pageserver_api::{ - key::Key, - reltag::{BlockNumber, RelTag, SlruKind}, - shard::{ShardCount, ShardStripeSize}, -}; -use std::str::FromStr; +use pageserver_api::key::Key; +use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; +use pageserver_api::shard::{ShardCount, ShardStripeSize}; #[derive(Parser)] pub(super) struct DescribeKeyCommand { @@ -394,7 +393,10 @@ mod tests { fn single_positional_spanalike_is_key_material() { // why is this needed? if you are checking many, then copypaste starts to appeal let strings = [ - (line!(), "2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0"), + ( + line!(), + "2024-05-15T15:33:49.873906Z ERROR page_service_conn_main{peer_addr=A:B}:process_query{tenant_id=C timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm blkno=2 req_lsn=0/238D98C8}: error reading relation or page version: Read error: could not find data for key 000000067F00032CE5000000000000000001 (shard ShardNumber(0)) at LSN 0/1D0A16C1, request LSN 0/238D98C8, ancestor 0/0", + ), (line!(), "rel=1663/208101/2620_fsm blkno=2"), (line!(), "rel=1663/208101/2620.1 blkno=2"), ]; @@ -420,7 +422,15 @@ mod tests { #[test] fn multiple_spanlike_args() { let strings = [ - (line!(), &["process_query{tenant_id=C", "timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm", "blkno=2", "req_lsn=0/238D98C8}"][..]), + ( + line!(), + &[ + "process_query{tenant_id=C", + "timeline_id=D}:handle_pagerequests:handle_get_page_at_lsn_request{rel=1663/208101/2620_fsm", + "blkno=2", + "req_lsn=0/238D98C8}", + ][..], + ), (line!(), &["rel=1663/208101/2620_fsm", "blkno=2"][..]), (line!(), &["1663/208101/2620_fsm", "2"][..]), ]; diff --git a/pageserver/ctl/src/layer_map_analyzer.rs b/pageserver/ctl/src/layer_map_analyzer.rs index 2c350d6d86..b426f977cf 100644 --- a/pageserver/ctl/src/layer_map_analyzer.rs +++ b/pageserver/ctl/src/layer_map_analyzer.rs @@ -2,27 +2,27 @@ //! //! Currently it only analyzes holes, which are regions within the layer range that the layer contains no updates for. In the future it might do more analysis (maybe key quantiles?) but it should never return sensitive data. -use anyhow::{anyhow, Result}; -use camino::{Utf8Path, Utf8PathBuf}; -use pageserver::context::{DownloadBehavior, RequestContext}; -use pageserver::task_mgr::TaskKind; -use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; -use pageserver::virtual_file::api::IoMode; use std::cmp::Ordering; use std::collections::BinaryHeap; use std::ops::Range; use std::str::FromStr; use std::{fs, str}; +use anyhow::{Result, anyhow}; +use camino::{Utf8Path, Utf8PathBuf}; +use pageserver::context::{DownloadBehavior, RequestContext}; use pageserver::page_cache::{self, PAGE_SZ}; +use pageserver::task_mgr::TaskKind; use pageserver::tenant::block_io::FileBlockReader; use pageserver::tenant::disk_btree::{DiskBtreeReader, VisitDirection}; -use pageserver::tenant::storage_layer::delta_layer::{Summary, DELTA_KEY_SIZE}; -use pageserver::tenant::storage_layer::{range_overlaps, LayerName}; +use pageserver::tenant::storage_layer::delta_layer::{DELTA_KEY_SIZE, Summary}; +use pageserver::tenant::storage_layer::{LayerName, range_overlaps}; +use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; +use pageserver::virtual_file::api::IoMode; use pageserver::virtual_file::{self, VirtualFile}; -use pageserver_api::key::{Key, KEY_SIZE}; - -use utils::{bin_ser::BeSer, lsn::Lsn}; +use pageserver_api::key::{KEY_SIZE, Key}; +use utils::bin_ser::BeSer; +use utils::lsn::Lsn; use crate::AnalyzeLayerMapCmd; diff --git a/pageserver/ctl/src/layers.rs b/pageserver/ctl/src/layers.rs index 4c2c3ab30e..05fb35ff09 100644 --- a/pageserver/ctl/src/layers.rs +++ b/pageserver/ctl/src/layers.rs @@ -1,3 +1,4 @@ +use std::fs::{self, File}; use std::path::{Path, PathBuf}; use anyhow::Result; @@ -5,12 +6,10 @@ use camino::{Utf8Path, Utf8PathBuf}; use clap::Subcommand; use pageserver::context::{DownloadBehavior, RequestContext}; use pageserver::task_mgr::TaskKind; -use pageserver::tenant::storage_layer::{delta_layer, image_layer}; -use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer}; +use pageserver::tenant::storage_layer::{DeltaLayer, ImageLayer, delta_layer, image_layer}; use pageserver::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; use pageserver::virtual_file::api::IoMode; use pageserver::{page_cache, virtual_file}; -use std::fs::{self, File}; use utils::id::{TenantId, TimelineId}; use crate::layer_map_analyzer::parse_filename; diff --git a/pageserver/ctl/src/main.rs b/pageserver/ctl/src/main.rs index 353b4bd2f9..72a120a69b 100644 --- a/pageserver/ctl/src/main.rs +++ b/pageserver/ctl/src/main.rs @@ -11,33 +11,29 @@ mod layer_map_analyzer; mod layers; mod page_trace; -use page_trace::PageTraceCmd; -use std::{ - str::FromStr, - time::{Duration, SystemTime}, -}; +use std::str::FromStr; +use std::time::{Duration, SystemTime}; use camino::{Utf8Path, Utf8PathBuf}; use clap::{Parser, Subcommand}; use index_part::IndexPartCmd; use layers::LayerCmd; -use pageserver::{ - context::{DownloadBehavior, RequestContext}, - page_cache, - task_mgr::TaskKind, - tenant::{dump_layerfile_from_path, metadata::TimelineMetadata}, - virtual_file::{self, api::IoMode}, -}; +use page_trace::PageTraceCmd; +use pageserver::context::{DownloadBehavior, RequestContext}; +use pageserver::page_cache; +use pageserver::task_mgr::TaskKind; +use pageserver::tenant::dump_layerfile_from_path; +use pageserver::tenant::metadata::TimelineMetadata; +use pageserver::virtual_file::api::IoMode; +use pageserver::virtual_file::{self}; use pageserver_api::shard::TenantShardId; use postgres_ffi::ControlFileData; use remote_storage::{RemotePath, RemoteStorageConfig}; use tokio_util::sync::CancellationToken; -use utils::{ - id::TimelineId, - logging::{self, LogFormat, TracingErrorLayerEnablement}, - lsn::Lsn, - project_git_version, -}; +use utils::id::TimelineId; +use utils::logging::{self, LogFormat, TracingErrorLayerEnablement}; +use utils::lsn::Lsn; +use utils::project_git_version; project_git_version!(GIT_VERSION); @@ -355,7 +351,9 @@ mod tests { assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines"); assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001-0004/timelines"); assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/"); - assert_valid("pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683"); + assert_valid( + "pageserver/v1/tenants/3aa8fcc61f6d357410b7de754b1d9001/timelines/641e5342083b2235ee3deb8066819683", + ); assert_eq!(validate_prefix("pageserver/v1/tenants/"), None); } } diff --git a/pageserver/pagebench/src/cmd/aux_files.rs b/pageserver/pagebench/src/cmd/aux_files.rs index b869a0c6c7..bab17540f5 100644 --- a/pageserver/pagebench/src/cmd/aux_files.rs +++ b/pageserver/pagebench/src/cmd/aux_files.rs @@ -1,12 +1,12 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::Instant; + use pageserver_api::models::{TenantConfig, TenantConfigRequest}; use pageserver_api::shard::TenantShardId; use utils::id::TenantTimelineId; use utils::lsn::Lsn; -use std::collections::HashMap; -use std::sync::Arc; -use std::time::Instant; - /// Ingest aux files into the pageserver. #[derive(clap::Parser)] pub(crate) struct Args { diff --git a/pageserver/pagebench/src/cmd/basebackup.rs b/pageserver/pagebench/src/cmd/basebackup.rs index 3ae6d99aa7..51d7d5df89 100644 --- a/pageserver/pagebench/src/cmd/basebackup.rs +++ b/pageserver/pagebench/src/cmd/basebackup.rs @@ -1,16 +1,3 @@ -use anyhow::Context; -use pageserver_api::shard::TenantShardId; -use pageserver_client::mgmt_api::ForceAwaitLogicalSize; -use pageserver_client::page_service::BasebackupRequest; - -use utils::id::TenantTimelineId; -use utils::lsn::Lsn; - -use rand::prelude::*; -use tokio::sync::Barrier; -use tokio::task::JoinSet; -use tracing::{info, instrument}; - use std::collections::HashMap; use std::num::NonZeroUsize; use std::ops::Range; @@ -18,6 +5,17 @@ use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; use std::sync::{Arc, Mutex}; use std::time::Instant; +use anyhow::Context; +use pageserver_api::shard::TenantShardId; +use pageserver_client::mgmt_api::ForceAwaitLogicalSize; +use pageserver_client::page_service::BasebackupRequest; +use rand::prelude::*; +use tokio::sync::Barrier; +use tokio::task::JoinSet; +use tracing::{info, instrument}; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; + use crate::util::tokio_thread_local_stats::AllThreadLocalStats; use crate::util::{request_stats, tokio_thread_local_stats}; diff --git a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs index a60efc7567..617676c079 100644 --- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs +++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs @@ -1,18 +1,3 @@ -use anyhow::Context; -use camino::Utf8PathBuf; -use pageserver_api::key::Key; -use pageserver_api::keyspace::KeySpaceAccum; -use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest}; - -use pageserver_api::shard::TenantShardId; -use tokio_util::sync::CancellationToken; -use utils::id::TenantTimelineId; -use utils::lsn::Lsn; - -use rand::prelude::*; -use tokio::task::JoinSet; -use tracing::info; - use std::collections::{HashSet, VecDeque}; use std::future::Future; use std::num::NonZeroUsize; @@ -21,6 +6,19 @@ use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::{Arc, Mutex}; use std::time::{Duration, Instant}; +use anyhow::Context; +use camino::Utf8PathBuf; +use pageserver_api::key::Key; +use pageserver_api::keyspace::KeySpaceAccum; +use pageserver_api::models::{PagestreamGetPageRequest, PagestreamRequest}; +use pageserver_api::shard::TenantShardId; +use rand::prelude::*; +use tokio::task::JoinSet; +use tokio_util::sync::CancellationToken; +use tracing::info; +use utils::id::TenantTimelineId; +use utils::lsn::Lsn; + use crate::util::tokio_thread_local_stats::AllThreadLocalStats; use crate::util::{request_stats, tokio_thread_local_stats}; diff --git a/pageserver/pagebench/src/cmd/ondemand_download_churn.rs b/pageserver/pagebench/src/cmd/ondemand_download_churn.rs index 1bb71b9353..3194e2e753 100644 --- a/pageserver/pagebench/src/cmd/ondemand_download_churn.rs +++ b/pageserver/pagebench/src/cmd/ondemand_download_churn.rs @@ -1,23 +1,19 @@ -use pageserver_api::{models::HistoricLayerInfo, shard::TenantShardId}; +use std::f64; +use std::num::NonZeroUsize; +use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; +use pageserver_api::models::HistoricLayerInfo; +use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; use rand::seq::SliceRandom; +use tokio::sync::{OwnedSemaphorePermit, mpsc}; +use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::{debug, info}; use utils::id::{TenantTimelineId, TimelineId}; -use std::{f64, sync::Arc}; -use tokio::{ - sync::{mpsc, OwnedSemaphorePermit}, - task::JoinSet, -}; - -use std::{ - num::NonZeroUsize, - sync::atomic::{AtomicU64, Ordering}, - time::{Duration, Instant}, -}; - /// Evict & on-demand download random layers. #[derive(clap::Parser)] pub(crate) struct Args { diff --git a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs index f07beeecfd..16abbf9ffd 100644 --- a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs +++ b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs @@ -2,11 +2,10 @@ use std::sync::Arc; use humantime::Duration; use pageserver_api::shard::TenantShardId; +use pageserver_client::mgmt_api::ForceAwaitLogicalSize; use tokio::task::JoinSet; use utils::id::TenantTimelineId; -use pageserver_client::mgmt_api::ForceAwaitLogicalSize; - #[derive(clap::Parser)] pub(crate) struct Args { #[clap(long, default_value = "http://localhost:9898")] diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs index 5c305769dd..0e92e87103 100644 --- a/safekeeper/client/src/mgmt_api.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -3,17 +3,16 @@ //! Partially copied from pageserver client; some parts might be better to be //! united. +use std::error::Error as _; + use http_utils::error::HttpErrorBody; use reqwest::{IntoUrl, Method, StatusCode}; use safekeeper_api::models::{ PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, TimelineStatus, }; -use std::error::Error as _; -use utils::{ - id::{NodeId, TenantId, TimelineId}, - logging::SecretString, -}; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::logging::SecretString; #[derive(Debug, Clone)] pub struct Client { diff --git a/storage_controller/client/src/control_api.rs b/storage_controller/client/src/control_api.rs index f8a2790769..7888b18aa7 100644 --- a/storage_controller/client/src/control_api.rs +++ b/storage_controller/client/src/control_api.rs @@ -1,6 +1,7 @@ use pageserver_client::mgmt_api::{self, ResponseErrorMessageExt}; use reqwest::{Method, Url}; -use serde::{de::DeserializeOwned, Serialize}; +use serde::Serialize; +use serde::de::DeserializeOwned; pub struct Client { base_url: Url, diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 8671e340bd..9a3e042c24 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1856,7 +1856,7 @@ impl Service { } Ok(AttachHookResponse { - r#gen: attach_req + generation: attach_req .node_id .map(|_| tenant_shard.generation.expect("Test hook, not used on tenants that are mid-onboarding with a NULL generation").into().unwrap()), }) From 3a3d62dc4fae962b99d451078829dfd007b94a0e Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Thu, 27 Feb 2025 11:45:13 +0100 Subject: [PATCH 044/207] Bodobolero/test cum stats persistence (#10995) ## Problem So far cumulative statistics have not been persisted when Neon scales to zero (suspends endpoint). With PR https://github.com/neondatabase/neon/pull/6560 the cumulative statistics should now survive endpoint restarts and correctly trigger the auto- vacuum and auto analyze maintenance So far we did not have a testcase that validates that improvement in our dev cloud environment with a real project. ## Summary of changes Introduce testcase `test_cumulative_statistics_persistence`in the benchmarking workflow running daily to verify: - Verifies that the cumulative statistics are correctly persisted across restarts. - Cumulative statistics are important to persist across restarts because they are used - when auto-vacuum an auto-analyze trigger conditions are met. - The test performs the following steps: - Seed a new project using pgbench - insert tuples that by itself are not enough to trigger auto-vacuum - suspend the endpoint - resume the endpoint - insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are - verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension ## Test run https://github.com/neondatabase/neon/actions/runs/13546879714/job/37860609089#step:6:282 --- .github/workflows/benchmarking.yml | 56 +++++ .../test_cumulative_statistics_persistence.py | 221 ++++++++++++++++++ 2 files changed, 277 insertions(+) create mode 100644 test_runner/performance/test_cumulative_statistics_persistence.py diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index dd4941bb21..ffb6c65af9 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -140,6 +140,7 @@ jobs: --ignore test_runner/performance/test_logical_replication.py --ignore test_runner/performance/test_physical_replication.py --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py + --ignore test_runner/performance/test_cumulative_statistics_persistence.py env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" @@ -171,6 +172,61 @@ jobs: env: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + cumstats-test: + if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} + permissions: + contents: write + statuses: write + id-token: write # aws-actions/configure-aws-credentials + env: + POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install + DEFAULT_PG_VERSION: 17 + TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote + SAVE_PERF_REPORT: ${{ github.event.inputs.save_perf_report || ( github.ref_name == 'main' ) }} + PLATFORM: "neon-staging" + + runs-on: [ self-hosted, us-east-2, x64 ] + container: + image: neondatabase/build-tools:pinned-bookworm + credentials: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + options: --init + + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact + path: /tmp/neon/ + prefix: latest + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + + - name: Verify that cumulative statistics are preserved + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance/test_cumulative_statistics_persistence.py + run_in_parallel: false + save_perf_report: ${{ env.SAVE_PERF_REPORT }} + extra_params: -m remote_cluster --timeout 3600 + pg_version: ${{ env.DEFAULT_PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + NEON_API_KEY: ${{ secrets.NEON_STAGING_API_KEY }} + replication-tests: if: ${{ github.event.inputs.run_only_pgvector_tests == 'false' || github.event.inputs.run_only_pgvector_tests == null }} permissions: diff --git a/test_runner/performance/test_cumulative_statistics_persistence.py b/test_runner/performance/test_cumulative_statistics_persistence.py new file mode 100644 index 0000000000..061467bbad --- /dev/null +++ b/test_runner/performance/test_cumulative_statistics_persistence.py @@ -0,0 +1,221 @@ +import math # Add this import +import time +import traceback +from pathlib import Path + +import psycopg2 +import psycopg2.extras +import pytest +from fixtures.benchmark_fixture import NeonBenchmarker +from fixtures.log_helper import log +from fixtures.neon_api import NeonAPI, connection_parameters_to_env +from fixtures.neon_fixtures import PgBin +from fixtures.pg_version import PgVersion + +vacuum_times_sql = """ +SELECT + relname AS table_name, + last_autovacuum, + last_autoanalyze +FROM + pg_stat_user_tables where relname = 'pgbench_accounts' +ORDER BY + last_autovacuum DESC, last_autoanalyze DESC +""" + + +def insert_first_chunk_and_verify_autovacuum_is_not_running( + cur, rows_to_insert, autovacuum_naptime +): + cur.execute(f""" + INSERT INTO pgbench_accounts (aid, bid, abalance, filler) + SELECT + aid, + (random() * 10)::int + 1 AS bid, + (random() * 10000)::int AS abalance, + 'filler text' AS filler + FROM generate_series(6800001, {6800001 + rows_to_insert - 1}) AS aid; + """) + assert cur.rowcount == rows_to_insert + for _ in range(5): + time.sleep(0.5 * autovacuum_naptime) + cur.execute(vacuum_times_sql) + row = cur.fetchall()[0] + log.info(f"last_autovacuum: {row[1]}, last_autoanalyze: {row[2]}") + assert row[1] is None + + +def insert_second_chunk_and_verify_autovacuum_is_now_running( + cur, rows_to_insert, autovacuum_naptime +): + cur.execute(f""" + INSERT INTO pgbench_accounts (aid, bid, abalance, filler) + SELECT + aid, + (random() * 10)::int + 1 AS bid, + (random() * 10000)::int AS abalance, + 'filler text' AS filler + FROM generate_series({6800001 + rows_to_insert}, {6800001 + rows_to_insert * 2 - 1}) AS aid; + """) + assert cur.rowcount == rows_to_insert + for _ in range(5): + time.sleep(0.5 * autovacuum_naptime) + cur.execute(vacuum_times_sql) + row = cur.fetchall()[0] + log.info(f"last_autovacuum: {row[1]}, last_autoanalyze: {row[2]}") + assert row[1] is not None + + +@pytest.mark.remote_cluster +@pytest.mark.timeout(60 * 60) +def test_cumulative_statistics_persistence( + pg_bin: PgBin, + test_output_dir: Path, + neon_api: NeonAPI, + pg_version: PgVersion, + zenbenchmark: NeonBenchmarker, +): + """ + Verifies that the cumulative statistics are correctly persisted across restarts. + Cumulative statistics are important to persist across restarts because they are used + when auto-vacuum an auto-analyze trigger conditions are met. + The test performs the following steps: + - Seed a new project using pgbench + - insert tuples that by itself are not enough to trigger auto-vacuum + - suspend the endpoint + - resume the endpoint + - insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are + - verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension + """ + project = neon_api.create_project(pg_version) + project_id = project["project"]["id"] + neon_api.wait_for_operation_to_finish(project_id) + endpoint_id = project["endpoints"][0]["id"] + region_id = project["project"]["region_id"] + log.info(f"Created project {project_id} with endpoint {endpoint_id} in region {region_id}") + error_occurred = False + try: + connstr = project["connection_uris"][0]["connection_uri"] + env = connection_parameters_to_env(project["connection_uris"][0]["connection_parameters"]) + # seed about 1 GiB of data into pgbench_accounts + pg_bin.run_capture(["pgbench", "-i", "-s68"], env=env) + + # assert rows in pgbench_accounts is 6800000 rows + conn = psycopg2.connect(connstr) + conn.autocommit = True + with conn.cursor() as cur: + # assert rows in pgbench_accounts is 6800000 rows + cur.execute("select count(*) from pgbench_accounts") + row_count = cur.fetchall()[0][0] + assert row_count == 6800000 + + # verify n_tup_ins, n_live_tup, vacuum_count, analyze_count (manual vacuum and analyze) + cur.execute( + "select n_tup_ins, vacuum_count,analyze_count from pg_stat_user_tables where relname = 'pgbench_accounts'" + ) + row = cur.fetchall()[0] + assert row[0] == 6800000 # n_tup_ins + assert row[1] == 1 # vacuum_count + assert row[2] == 1 # analyze_count + + # retrieve some GUCs (postgres settings) relevant to autovacuum + cur.execute( + "SELECT setting::int AS autovacuum_naptime FROM pg_settings WHERE name = 'autovacuum_naptime'" + ) + autovacuum_naptime = cur.fetchall()[0][0] + assert autovacuum_naptime < 300 and autovacuum_naptime > 0 + cur.execute( + "SELECT setting::float AS autovacuum_vacuum_insert_scale_factor FROM pg_settings WHERE name = 'autovacuum_vacuum_insert_scale_factor'" + ) + autovacuum_vacuum_insert_scale_factor = cur.fetchall()[0][0] + assert ( + autovacuum_vacuum_insert_scale_factor > 0.05 + and autovacuum_vacuum_insert_scale_factor < 1.0 + ) + cur.execute( + "SELECT setting::int AS autovacuum_vacuum_insert_threshold FROM pg_settings WHERE name = 'autovacuum_vacuum_insert_threshold'" + ) + autovacuum_vacuum_insert_threshold = cur.fetchall()[0][0] + cur.execute( + "SELECT setting::int AS pgstat_file_size_limit FROM pg_settings WHERE name = 'neon.pgstat_file_size_limit'" + ) + pgstat_file_size_limit = cur.fetchall()[0][0] + assert pgstat_file_size_limit > 10 * 1024 # at least 10 MB + + # insert rows that by itself are not enough to trigger auto-vacuum + # vacuum insert threshold = vacuum base insert threshold + vacuum insert scale factor * number of tuples + # https://www.postgresql.org/docs/17/routine-vacuuming.html + rows_to_insert = int( + math.ceil( + autovacuum_vacuum_insert_threshold / 2 + + row_count * autovacuum_vacuum_insert_scale_factor * 0.6 + ) + ) + + log.info( + f"autovacuum_vacuum_insert_scale_factor: {autovacuum_vacuum_insert_scale_factor}, autovacuum_vacuum_insert_threshold: {autovacuum_vacuum_insert_threshold}, row_count: {row_count}" + ) + log.info( + f"Inserting {rows_to_insert} rows, which is below the 'vacuum insert threshold'" + ) + + insert_first_chunk_and_verify_autovacuum_is_not_running( + cur, rows_to_insert, autovacuum_naptime + ) + + conn.close() + + # suspend the endpoint + log.info(f"Suspending endpoint {endpoint_id}") + neon_api.suspend_endpoint(project_id, endpoint_id) + neon_api.wait_for_operation_to_finish(project_id) + time.sleep(60) # give some time in between suspend and resume + + # resume the endpoint + log.info(f"Starting endpoint {endpoint_id}") + neon_api.start_endpoint(project_id, endpoint_id) + neon_api.wait_for_operation_to_finish(project_id) + + conn = psycopg2.connect(connstr) + conn.autocommit = True + with conn.cursor() as cur: + # insert additional rows that by itself are not enough to trigger auto-vacuum, but in combination + # with the previous rows inserted before the suspension are + log.info( + f"Inserting another {rows_to_insert} rows, which is below the 'vacuum insert threshold'" + ) + insert_second_chunk_and_verify_autovacuum_is_now_running( + cur, rows_to_insert, autovacuum_naptime + ) + + # verify estimatednumber of tuples in pgbench_accounts is within 6800000 + inserted rows +- 2 % + cur.execute( + "select reltuples::bigint from pg_class where relkind = 'r' and relname = 'pgbench_accounts'" + ) + reltuples = cur.fetchall()[0][0] + assert reltuples > 6800000 + rows_to_insert * 2 * 0.98 + assert reltuples < 6800000 + rows_to_insert * 2 * 1.02 + + # verify exact number of pgbench_accounts rows (computed row_count) + cur.execute("select count(*) from pgbench_accounts") + row_count = cur.fetchall()[0][0] + assert row_count == 6800000 + rows_to_insert * 2 + + # verify n_tup_ins, n_live_tup, vacuum_count, analyze_count (manual vacuum and analyze) + cur.execute( + "select n_tup_ins, vacuum_count,analyze_count from pg_stat_user_tables where relname = 'pgbench_accounts'" + ) + row = cur.fetchall()[0] + assert row[0] == 6800000 + rows_to_insert * 2 + assert row[1] == 1 + assert row[2] == 1 + + conn.close() + + except Exception as e: + error_occurred = True + log.error(f"Caught exception: {e}") + log.error(traceback.format_exc()) + finally: + assert not error_occurred # Fail the test if an error occurred + neon_api.delete_project(project_id) From e35f7758d895c3fa93ea3d415f35a93de10b988b Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 27 Feb 2025 11:59:43 +0100 Subject: [PATCH 045/207] impr(controller_upcall_client): clean up copy-pasta code & add context to retries (#10991) Before this PR, re-attach and validate would log the same warning ``` calling control plane generation validation API failed ``` on retry errors. This can be confusing. This PR makes the message generically valid for any upcall and adds additional tracing spans to capture context. Along the way, clean up some copy-pasta variable naming. refs - https://github.com/neondatabase/neon/issues/10381#issuecomment-2684755827 --------- Co-authored-by: Alexander Lakhin --- pageserver/src/controller_upcall_client.rs | 14 ++++++++------ test_runner/fixtures/pageserver/allowed_errors.py | 4 +++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index 8462594607..6d5c727958 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -84,6 +84,7 @@ impl ControllerUpcallClient { }) } + #[tracing::instrument(skip_all)] async fn retry_http_forever( &self, url: &url::Url, @@ -108,7 +109,7 @@ impl ControllerUpcallClient { |_| false, 3, u32::MAX, - "calling control plane generation validation API", + "storage controller upcall", &self.cancel, ) .await @@ -125,11 +126,12 @@ impl ControllerUpcallClient { impl ControlPlaneGenerationsApi for ControllerUpcallClient { /// Block until we get a successful response, or error out if we are shut down + #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context async fn re_attach( &self, conf: &PageServerConf, ) -> Result, RetryForeverError> { - let re_attach_path = self + let url = self .base_url .join("re-attach") .expect("Failed to build re-attach path"); @@ -205,7 +207,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { register: register.clone(), }; - let response: ReAttachResponse = self.retry_http_forever(&re_attach_path, request).await?; + let response: ReAttachResponse = self.retry_http_forever(&url, request).await?; tracing::info!( "Received re-attach response with {} tenants (node {}, register: {:?})", response.tenants.len(), @@ -223,11 +225,12 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { } /// Block until we get a successful response, or error out if we are shut down + #[tracing::instrument(skip_all)] // so that warning logs from retry_http_forever have context async fn validate( &self, tenants: Vec<(TenantShardId, Generation)>, ) -> Result, RetryForeverError> { - let re_attach_path = self + let url = self .base_url .join("validate") .expect("Failed to build validate path"); @@ -257,8 +260,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { return Err(RetryForeverError::ShuttingDown); } - let response: ValidateResponse = - self.retry_http_forever(&re_attach_path, request).await?; + let response: ValidateResponse = self.retry_http_forever(&url, request).await?; for rt in response.tenants { result.insert(rt.id, rt.valid); } diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index 748ac0d569..4fce558840 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -94,7 +94,9 @@ DEFAULT_PAGESERVER_ALLOWED_ERRORS = ( ".*Flushed oversized open layer with size.*", # During teardown, we stop the storage controller before the pageservers, so pageservers # can experience connection errors doing background deletion queue work. - ".*WARN deletion backend: calling control plane generation validation API failed.*error sending request.*", + ".*WARN deletion backend:.* storage controller upcall failed, will retry.*error sending request.*", + # Can happen when the pageserver starts faster than the storage controller + ".*WARN init_tenant_mgr:.* storage controller upcall failed, will retry.*error sending request.*", # Can happen when the test shuts down the storage controller while it is calling the utilization API ".*WARN.*path=/v1/utilization .*request was dropped before completing", # Can happen during shutdown From 93b59e65a2dcc6c03e01a389c4e86dee0086e48d Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 27 Feb 2025 13:56:28 +0100 Subject: [PATCH 046/207] pageserver: remove stale comment (#11016) No longer true now that we eagerly notify the compaction loop. --- pageserver/src/tenant/timeline.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 3164cdbdd2..662088fbde 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -4212,10 +4212,6 @@ impl Timeline { // Stall flushes to backpressure if compaction can't keep up. This is propagated up // to WAL ingestion by having ephemeral layer rolls wait for flushes. - // - // NB: the compaction loop only checks `compaction_threshold` every 20 seconds, so - // we can end up stalling before compaction even starts. Consider making it more - // responsive (e.g. via `watch_level0_deltas`). if let Some(stall_threshold) = self.get_l0_flush_stall_threshold() { if l0_count >= stall_threshold { warn!( From ad3719974537a9b07be293bac0f3c8db14096035 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Thu, 27 Feb 2025 14:45:18 +0100 Subject: [PATCH 047/207] Separate the upgrade tests in timelines (#10974) ## Problem We created extensions in a single database. The tests could interfere, i.e., discover some service tables left by other extensions and produce unexpected results. ## Summary of changes The tests are now run in a separate timeline, so only one extension owns the database, which prevents interference. --- .../ext-src/pgtap-src/test-upgrade.patch | 2 +- docker-compose/test_extensions_upgrade.sh | 69 ++++++++++++------- 2 files changed, 46 insertions(+), 25 deletions(-) diff --git a/docker-compose/ext-src/pgtap-src/test-upgrade.patch b/docker-compose/ext-src/pgtap-src/test-upgrade.patch index a4c46e93ce..c050ab8d00 100644 --- a/docker-compose/ext-src/pgtap-src/test-upgrade.patch +++ b/docker-compose/ext-src/pgtap-src/test-upgrade.patch @@ -7,7 +7,7 @@ index f255fe6..0a0fa65 100644 GENERATED_SCHEDULE_DEPS = $(TB_DIR)/all_tests $(TB_DIR)/exclude_tests REGRESS = --schedule $(TB_DIR)/run.sch # Set this again just to be safe -REGRESS_OPTS = --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF) -+REGRESS_OPTS = --use-existing --dbname=pgtap_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF) ++REGRESS_OPTS = --use-existing --dbname=contrib_regression --inputdir=test --max-connections=$(PARALLEL_CONN) --schedule $(SETUP_SCH) $(REGRESS_CONF) SETUP_SCH = test/schedule/main.sch # schedule to use for test setup; this can be forcibly changed by some targets! IGNORE_TESTS = $(notdir $(EXCLUDE_TEST_FILES:.sql=)) PARALLEL_TESTS = $(filter-out $(IGNORE_TESTS),$(filter-out $(SERIAL_TESTS),$(ALL_TESTS))) diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh index c399109eb9..6e6c41538d 100755 --- a/docker-compose/test_extensions_upgrade.sh +++ b/docker-compose/test_extensions_upgrade.sh @@ -12,6 +12,7 @@ if [ -z ${OLD_COMPUTE_TAG+x} ] || [ -z ${NEW_COMPUTE_TAG+x} ] || [ -z "${OLD_COM fi export PG_VERSION=${PG_VERSION:-16} export PG_TEST_VERSION=${PG_VERSION} +# Waits for compute node is ready function wait_for_ready { TIME=0 while ! docker compose logs compute_is_ready | grep -q "accepting connections" && [ ${TIME} -le 300 ] ; do @@ -23,11 +24,45 @@ function wait_for_ready { exit 2 fi } +# Creates extensions. Gets a string with space-separated extensions as a parameter function create_extensions() { for ext in ${1}; do docker compose exec neon-test-extensions psql -X -v ON_ERROR_STOP=1 -d contrib_regression -c "CREATE EXTENSION IF NOT EXISTS ${ext} CASCADE" done } +# Creates a new timeline. Gets the parent ID and an extension name as parameters. +# Saves the timeline ID in the variable EXT_TIMELINE +function create_timeline() { + generate_id new_timeline_id + + PARAMS=( + -sbf + -X POST + -H "Content-Type: application/json" + -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${1}\"}" + "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/" + ) + result=$(curl "${PARAMS[@]}") + echo $result | jq . + EXT_TIMELINE[${2}]=${new_timeline_id} +} +# Checks if the timeline ID of the compute node is expected. Gets the timeline ID as a parameter +function check_timeline() { + TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id") + if [ "${TID}" != "${1}" ]; then + echo Timeline mismatch + exit 1 + fi +} +# Restarts the compute node with the required compute tag and timeline. +# Accepts the tag for the compute node and the timeline as parameters. +function restart_compute() { + docker compose down compute compute_is_ready + COMPUTE_TAG=${1} TAG=${OLD_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready + wait_for_ready + check_timeline ${2} +} +declare -A EXT_TIMELINE EXTENSIONS='[ {"extname": "plv8", "extdir": "plv8-src"}, {"extname": "vector", "extdir": "pgvector-src"}, @@ -47,7 +82,7 @@ EXTENSIONS='[ {"extname": "pg_repack", "extdir": "pg_repack-src"} ]' EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -) -COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d +TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" @@ -55,12 +90,14 @@ create_extensions "${EXTNAMES}" query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')" new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query") docker compose --profile test-extensions down -COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate +TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" -docker compose exec neon-test-extensions psql -c "CREATE DATABASE pgtap_regression" -docker compose exec neon-test-extensions psql -d pgtap_regression -c "CREATE EXTENSION pgtap" +tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id") +EXT_TIMELINE["main"]=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id") +create_timeline "${EXT_TIMELINE["main"]}" init +restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE["init"]}" create_extensions "${EXTNAMES}" if [ "${FORCE_ALL_UPGRADE_TESTS:-false}" = true ]; then exts="${EXTNAMES}" @@ -71,29 +108,13 @@ fi if [ -z "${exts}" ]; then echo "No extensions were upgraded" else - tenant_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.tenant_id") - timeline_id=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id") for ext in ${exts}; do echo Testing ${ext}... + create_timeline "${EXT_TIMELINE["main"]}" ${ext} EXTDIR=$(echo ${EXTENSIONS} | jq -r '.[] | select(.extname=="'${ext}'") | .extdir') - generate_id new_timeline_id - PARAMS=( - -sbf - -X POST - -H "Content-Type: application/json" - -d "{\"new_timeline_id\": \"${new_timeline_id}\", \"pg_version\": ${PG_VERSION}, \"ancestor_timeline_id\": \"${timeline_id}\"}" - "http://127.0.0.1:9898/v1/tenant/${tenant_id}/timeline/" - ) - result=$(curl "${PARAMS[@]}") - echo $result | jq . - TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose down compute compute_is_ready - COMPUTE_TAG=${NEW_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${new_timeline_id} docker compose up --quiet-pull -d --build compute compute_is_ready - wait_for_ready - TID=$(docker compose exec neon-test-extensions psql -Aqt -c "SHOW neon.timeline_id") - if [ ${TID} != ${new_timeline_id} ]; then - echo Timeline mismatch - exit 1 - fi + restart_compute "${OLD_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}" + docker compose exec neon-test-extensions psql -d contrib_regression -c "CREATE EXTENSION ${ext} CASCADE" + restart_compute "${NEW_COMPUTE_TAG}" "${EXT_TIMELINE[${ext}]}" docker compose exec neon-test-extensions psql -d contrib_regression -c "\dx ${ext}" if ! docker compose exec neon-test-extensions sh -c /ext-src/${EXTDIR}/test-upgrade.sh; then docker compose exec neon-test-extensions cat /ext-src/${EXTDIR}/regression.diffs From a283edaccf7413c0cb0768c775f00158eacb0af3 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Thu, 27 Feb 2025 15:00:18 +0100 Subject: [PATCH 048/207] PS/Prefetch: Use a timeout for reading data from TCP (#10834) This reduces pressure on OS TCP buffers, reducing flush times in other systems like PageServer. ## Problem ## Summary of changes --- pgxn/neon/libpagestore.c | 21 +++- pgxn/neon/neon.c | 1 + pgxn/neon/neon.h | 2 + pgxn/neon/pagestore_client.h | 6 +- pgxn/neon/pagestore_smgr.c | 207 +++++++++++++++++++++++++++++++---- 5 files changed, 211 insertions(+), 26 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index f71f11ff93..49f12bbb9e 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -1099,6 +1099,10 @@ pageserver_try_receive(shardno_t shard_no) { neon_shard_log(shard_no, LOG, "pageserver_receive: disconnect due to failure while parsing response"); pageserver_disconnect(shard_no); + /* + * Malformed responses from PageServer are a reason to raise + * errors and cancel transactions. + */ PG_RE_THROW(); } PG_END_TRY(); @@ -1122,7 +1126,8 @@ pageserver_try_receive(shardno_t shard_no) char *msg = pchomp(PQerrorMessage(pageserver_conn)); pageserver_disconnect(shard_no); - neon_shard_log(shard_no, ERROR, "pageserver_receive disconnect: could not read COPY data: %s", msg); + neon_shard_log(shard_no, LOG, "pageserver_receive disconnect: could not read COPY data: %s", msg); + resp = NULL; } else { @@ -1321,6 +1326,16 @@ pg_init_libpagestore(void) PGC_USERSET, 0, /* no flags required */ NULL, (GucIntAssignHook) &readahead_buffer_resize, NULL); + DefineCustomIntVariable("neon.readahead_getpage_pull_timeout", + "readahead response pull timeout", + "Time between active tries to pull data from the " + "PageStream connection when we have pages which " + "were read ahead but not yet received.", + &readahead_getpage_pull_timeout_ms, + 0, 0, 5 * 60 * 1000, + PGC_USERSET, + GUC_UNIT_MS, + NULL, NULL, NULL); DefineCustomIntVariable("neon.protocol_version", "Version of compute<->page server protocol", NULL, @@ -1334,7 +1349,7 @@ pg_init_libpagestore(void) DefineCustomIntVariable("neon.pageserver_response_log_timeout", "pageserver response log timeout", - "If the pageserver doesn't respond to a request within this timeout," + "If the pageserver doesn't respond to a request within this timeout, " "a message is printed to the log.", &pageserver_response_log_timeout, 10000, 100, INT_MAX, @@ -1344,7 +1359,7 @@ pg_init_libpagestore(void) DefineCustomIntVariable("neon.pageserver_response_disconnect_timeout", "pageserver response diconnect timeout", - "If the pageserver doesn't respond to a request within this timeout," + "If the pageserver doesn't respond to a request within this timeout, " "disconnect and reconnect.", &pageserver_response_disconnect_timeout, 120000, 100, INT_MAX, diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 768d7ae9e8..4b448ba5f6 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -437,6 +437,7 @@ _PG_init(void) pg_init_libpagestore(); pg_init_walproposer(); + pagestore_smgr_init(); Custom_XLogReaderRoutines = NeonOnDemandXLogReaderRoutines; InitUnstableExtensionsSupport(); diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h index 912e09c3d3..7686ce076b 100644 --- a/pgxn/neon/neon.h +++ b/pgxn/neon/neon.h @@ -22,6 +22,7 @@ extern char *neon_tenant; extern char *wal_acceptors_list; extern int wal_acceptor_reconnect_timeout; extern int wal_acceptor_connection_timeout; +extern int readahead_getpage_pull_timeout_ms; #if PG_MAJORVERSION_NUM >= 17 extern uint32 WAIT_EVENT_NEON_LFC_MAINTENANCE; @@ -49,6 +50,7 @@ extern uint32 WAIT_EVENT_NEON_WAL_DL; extern void pg_init_libpagestore(void); extern void pg_init_walproposer(void); +extern void pagestore_smgr_init(void); extern uint64 BackpressureThrottlingTime(void); extern void SetNeonCurrentClusterSize(uint64 size); diff --git a/pgxn/neon/pagestore_client.h b/pgxn/neon/pagestore_client.h index 9faab1e4f0..475697f9c0 100644 --- a/pgxn/neon/pagestore_client.h +++ b/pgxn/neon/pagestore_client.h @@ -209,7 +209,11 @@ typedef struct NeonResponse *(*receive) (shardno_t shard_no); /* * Try get the next response from the TCP buffers, if any. - * Returns NULL when the data is not yet available. + * Returns NULL when the data is not yet available. + * + * This will raise errors only for malformed responses (we can't put them + * back into connection). All other error conditions are soft errors and + * return NULL as "no response available". */ NeonResponse *(*try_receive) (shardno_t shard_no); /* diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 091ad555e0..fe463fd4a6 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -65,10 +65,12 @@ #include "storage/fsm_internals.h" #include "storage/md.h" #include "storage/smgr.h" +#include "utils/timeout.h" +#include "bitmap.h" +#include "neon.h" #include "neon_perf_counters.h" #include "pagestore_client.h" -#include "bitmap.h" #if PG_VERSION_NUM >= 150000 #include "access/xlogrecovery.h" @@ -123,6 +125,45 @@ static BlockNumber neon_nblocks(SMgrRelation reln, ForkNumber forknum); static uint32 local_request_counter; #define GENERATE_REQUEST_ID() (((NeonRequestId)MyProcPid << 32) | ++local_request_counter) +/* + * Various settings related to prompt (fast) handling of PageStream responses + * at any CHECK_FOR_INTERRUPTS point. + */ +int readahead_getpage_pull_timeout_ms = 0; +static int PS_TIMEOUT_ID = 0; +static bool timeout_set = false; +static bool timeout_signaled = false; + +/* + * We have a CHECK_FOR_INTERRUPTS in page_server->receive(), and we don't want + * that to handle any getpage responses if we're already working on the + * backlog of those, as we'd hit issues with determining which prefetch slot + * we just got a response for. + * + * To protect against that, we have this variable that's set whenever we start + * receiving data for prefetch slots, so that we don't get confused. + * + * Note that in certain error cases during readpage we may leak r_r_g=true, + * which results in a failure to pick up further responses until we first + * actively try to receive new getpage responses. + */ +static bool readpage_reentrant_guard = false; + +static void reconfigure_timeout_if_needed(void); +static void pagestore_timeout_handler(void); + +#define START_PREFETCH_RECEIVE_WORK() \ + do { \ + readpage_reentrant_guard = true; \ + } while (false) + +#define END_PREFETCH_RECEIVE_WORK() \ + do { \ + readpage_reentrant_guard = false; \ + if (unlikely(timeout_signaled && !InterruptPending)) \ + InterruptPending = true; \ + } while (false) + /* * Prefetch implementation: * @@ -221,7 +262,6 @@ typedef struct PrfHashEntry #define SH_DEFINE #define SH_DECLARE #include "lib/simplehash.h" -#include "neon.h" /* * PrefetchState maintains the state of (prefetch) getPage@LSN requests. @@ -407,17 +447,26 @@ compact_prefetch_buffers(void) } /* - * If there might be responses still in the TCP buffer, then - * we should try to use those, so as to reduce any TCP backpressure - * on the OS/PS side. + * If there might be responses still in the TCP buffer, then we should try to + * use those, to reduce any TCP backpressure on the OS/PS side. * * This procedure handles that. * - * Note that this is only valid as long as the only pipelined - * operations in the TCP buffer are getPage@Lsn requests. + * Note that this works because we don't pipeline non-getPage requests. + * + * NOTE: This procedure is not allowed to throw errors that should be handled + * by SMGR-related code, as this can be called from every CHECK_FOR_INTERRUPTS + * point inside and outside PostgreSQL. + * + * This still does throw errors when it receives malformed responses from PS. + * + * When we're not called from CHECK_FOR_INTERRUPTS (indicated by + * IsHandlingInterrupts) we also report we've ended prefetch receive work, + * just in case state tracking was lost due to an error in the sync getPage + * response code. */ static void -prefetch_pump_state(void) +prefetch_pump_state(bool IsHandlingInterrupts) { while (MyPState->ring_receive != MyPState->ring_flush) { @@ -466,6 +515,12 @@ prefetch_pump_state(void) } } } + + /* We never pump the prefetch state while handling other pages */ + if (!IsHandlingInterrupts) + END_PREFETCH_RECEIVE_WORK(); + + reconfigure_timeout_if_needed(); } void @@ -581,8 +636,8 @@ readahead_buffer_resize(int newsize, void *extra) /* * Make sure that there are no responses still in the buffer. * - * NOTE: this function may indirectly update MyPState->pfs_hash; which - * invalidates any active pointers into the hash table. + * This function may indirectly update MyPState->pfs_hash; which invalidates + * any active pointers into the hash table. */ static void consume_prefetch_responses(void) @@ -639,6 +694,7 @@ static bool prefetch_wait_for(uint64 ring_index) { PrefetchRequest *entry; + bool result = true; if (MyPState->ring_flush <= ring_index && MyPState->ring_unused > MyPState->ring_flush) @@ -652,13 +708,21 @@ prefetch_wait_for(uint64 ring_index) while (MyPState->ring_receive <= ring_index) { + START_PREFETCH_RECEIVE_WORK(); entry = GetPrfSlot(MyPState->ring_receive); Assert(entry->status == PRFS_REQUESTED); if (!prefetch_read(entry)) - return false; + { + result = false; + break; + } + + END_PREFETCH_RECEIVE_WORK(); + CHECK_FOR_INTERRUPTS(); } - return true; + + return result; } /* @@ -1316,6 +1380,12 @@ page_server_request(void const *req) page_server->disconnect(shard_no); MyNeonCounters->pageserver_open_requests = 0; + /* + * We know for sure we're not working on any prefetch pages after + * this. + */ + END_PREFETCH_RECEIVE_WORK(); + PG_RE_THROW(); } PG_END_TRY(); @@ -2943,7 +3013,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, MyPState->ring_last <= ring_index); } - prefetch_pump_state(); + prefetch_pump_state(false); return false; } @@ -2986,7 +3056,7 @@ neon_prefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) Assert(ring_index < MyPState->ring_unused && MyPState->ring_last <= ring_index); - prefetch_pump_state(); + prefetch_pump_state(false); return false; } @@ -3030,7 +3100,7 @@ neon_writeback(SMgrRelation reln, ForkNumber forknum, */ neon_log(SmgrTrace, "writeback noop"); - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -3278,7 +3348,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer } /* Try to read PS results if they are available */ - prefetch_pump_state(); + prefetch_pump_state(false); neon_get_request_lsns(InfoFromSMgrRel(reln), forkNum, blkno, &request_lsns, 1); @@ -3300,7 +3370,7 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer /* * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes. */ - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) @@ -3411,7 +3481,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, nblocks, PG_IOV_MAX); /* Try to read PS results if they are available */ - prefetch_pump_state(); + prefetch_pump_state(false); neon_get_request_lsns(InfoFromSMgrRel(reln), forknum, blocknum, request_lsns, nblocks); @@ -3456,7 +3526,7 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* * Try to receive prefetch results once again just to make sure we don't leave the smgr code while the OS might still have buffered bytes. */ - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) @@ -3626,7 +3696,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo lfc_write(InfoFromSMgrRel(reln), forknum, blocknum, buffer); - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -3681,7 +3751,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, lfc_writev(InfoFromSMgrRel(reln), forknum, blkno, buffers, nblocks); - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -3972,7 +4042,7 @@ neon_immedsync(SMgrRelation reln, ForkNumber forknum) neon_log(SmgrTrace, "[NEON_SMGR] immedsync noop"); - prefetch_pump_state(); + prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) @@ -4273,6 +4343,7 @@ neon_read_slru_segment(SMgrRelation reln, const char* path, int segno, void* buf } pfree(resp); + reconfigure_timeout_if_needed(); return n_blocks; } @@ -4308,6 +4379,7 @@ AtEOXact_neon(XactEvent event, void *arg) } break; } + reconfigure_timeout_if_needed(); } static const struct f_smgr neon_smgr = @@ -4564,3 +4636,94 @@ neon_redo_read_buffer_filter(XLogReaderState *record, uint8 block_id) } return no_redo_needed; } + +static void +reconfigure_timeout_if_needed(void) +{ + bool needs_set = MyPState->ring_receive != MyPState->ring_unused && + readahead_getpage_pull_timeout_ms > 0; + + if (needs_set != timeout_set) + { + /* The background writer doens't (shouldn't) read any pages */ + Assert(!AmBackgroundWriterProcess()); + /* The checkpointer doens't (shouldn't) read any pages */ + Assert(!AmCheckpointerProcess()); + + if (unlikely(PS_TIMEOUT_ID == 0)) + { + PS_TIMEOUT_ID = RegisterTimeout(USER_TIMEOUT, pagestore_timeout_handler); + } + + if (needs_set) + { +#if PG_MAJORVERSION_NUM <= 14 + enable_timeout_after(PS_TIMEOUT_ID, readahead_getpage_pull_timeout_ms); +#else + enable_timeout_every( + PS_TIMEOUT_ID, + TimestampTzPlusMilliseconds(GetCurrentTimestamp(), + readahead_getpage_pull_timeout_ms), + readahead_getpage_pull_timeout_ms + ); +#endif + timeout_set = true; + } + else + { + Assert(timeout_set); + disable_timeout(PS_TIMEOUT_ID, false); + timeout_set = false; + } + } +} + +static void +pagestore_timeout_handler(void) +{ +#if PG_MAJORVERSION_NUM <= 14 + /* + * PG14: Setting a repeating timeout is not possible, so we signal here + * that the timeout has already been reset, and by telling the system + * that system will re-schedule it later if we need to. + */ + timeout_set = false; +#endif + timeout_signaled = true; + InterruptPending = true; +} + +static process_interrupts_callback_t prev_interrupt_cb; + +/* + * Process new data received in our active PageStream sockets. + * + * This relies on the invariant that all pipelined yet-to-be-received requests + * are getPage requests managed by MyPState. This is currently true, any + * modification will probably require some stuff to make it work again. + */ +static bool +pagestore_smgr_processinterrupts(void) +{ + if (timeout_signaled) + { + if (!readpage_reentrant_guard && readahead_getpage_pull_timeout_ms > 0) + prefetch_pump_state(true); + + timeout_signaled = false; + reconfigure_timeout_if_needed(); + } + + if (!prev_interrupt_cb) + return false; + + return prev_interrupt_cb(); +} + + +void +pagestore_smgr_init(void) +{ + prev_interrupt_cb = ProcessInterruptsCallback; + ProcessInterruptsCallback = pagestore_smgr_processinterrupts; +} From e58f264a05c12f843fb3eff0c1bbd441f390aaa6 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 27 Feb 2025 16:31:05 +0200 Subject: [PATCH 049/207] Increase inmem SMGR size for walredo process to 100 pagees (#10937) ## Problem We see `Inmem storage overflow` in page server logs: https://neondb.slack.com/archives/C033RQ5SPDH/p1740157873114339 walked process is using inseam SMGR with storage size limited by 64 pages with warning watermark 32 (based ion the assumption that XLR_MAX_BLOCK_ID is 32, so WAL record can not access more than 32 pages). Actually it is not true. We can update up to 3 forks for each block (including update of FSM and VM forks). ## Summary of changes This PR increases inseam SMGR size for walled process to 100 pages and print stack trace in case of overflow. --------- Co-authored-by: Konstantin Knizhnik --- pageserver/src/walredo/process.rs | 4 +++- pgxn/neon_walredo/inmem_smgr.c | 16 ++++++++-------- pgxn/neon_walredo/walredoproc.c | 5 +++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pageserver/src/walredo/process.rs b/pageserver/src/walredo/process.rs index 5a9fc63e63..6d4a38d4ff 100644 --- a/pageserver/src/walredo/process.rs +++ b/pageserver/src/walredo/process.rs @@ -136,7 +136,9 @@ impl WalRedoProcess { Ok(0) => break Ok(()), // eof Ok(num_bytes) => { let output = String::from_utf8_lossy(&buf[..num_bytes]); - error!(%output, "received output"); + if !output.contains("LOG:") { + error!(%output, "received output"); + } } Err(e) => { break Err(e); diff --git a/pgxn/neon_walredo/inmem_smgr.c b/pgxn/neon_walredo/inmem_smgr.c index 74cd5ac601..ff2846a9e7 100644 --- a/pgxn/neon_walredo/inmem_smgr.c +++ b/pgxn/neon_walredo/inmem_smgr.c @@ -32,8 +32,8 @@ #include "inmem_smgr.h" -/* Size of the in-memory smgr */ -#define MAX_PAGES 64 +/* Size of the in-memory smgr: XLR_MAX_BLOCK_ID is 32, but we can update up to 3 forks for each block */ +#define MAX_PAGES 100 /* If more than WARN_PAGES are used, print a warning in the log */ #define WARN_PAGES 32 @@ -285,12 +285,12 @@ inmem_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, * WARN_PAGES, print a warning so that we get alerted and get to * investigate why we're accessing so many buffers. */ - elog(used_pages >= WARN_PAGES ? WARNING : DEBUG1, - "inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u", - RelFileInfoFmt(InfoFromSMgrRel(reln)), - forknum, - blocknum, - used_pages); + if (used_pages >= WARN_PAGES) + ereport(WARNING, (errmsg("inmem_write() called for %u/%u/%u.%u blk %u: used_pages %u", + RelFileInfoFmt(InfoFromSMgrRel(reln)), + forknum, + blocknum, + used_pages), errbacktrace())); if (used_pages == MAX_PAGES) elog(ERROR, "Inmem storage overflow"); diff --git a/pgxn/neon_walredo/walredoproc.c b/pgxn/neon_walredo/walredoproc.c index 619b7255ae..4673de778c 100644 --- a/pgxn/neon_walredo/walredoproc.c +++ b/pgxn/neon_walredo/walredoproc.c @@ -142,7 +142,7 @@ static BufferTag target_redo_tag; static XLogReaderState *reader_state; -#define TRACE LOG +#define TRACE DEBUG1 #ifdef HAVE_LIBSECCOMP @@ -194,6 +194,7 @@ static PgSeccompRule allowed_syscalls[] = * is stored in MyProcPid anyway. */ PG_SCMP_ALLOW(getpid), + PG_SCMP_ALLOW(futex), /* needed for errbacktrace */ /* Enable those for a proper shutdown. */ #if 0 @@ -253,7 +254,7 @@ WalRedoMain(int argc, char *argv[]) * which is super strange but that's not something we can solve * for here. ¯\_(-_-)_/¯ */ - SetConfigOption("log_min_messages", "FATAL", PGC_SUSET, PGC_S_OVERRIDE); + SetConfigOption("log_min_messages", "WARNING", PGC_SUSET, PGC_S_OVERRIDE); SetConfigOption("client_min_messages", "ERROR", PGC_SUSET, PGC_S_OVERRIDE); From 7ed236e17e04763e6547430e29c9bd152dc12827 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 27 Feb 2025 17:16:14 +0100 Subject: [PATCH 050/207] fix(ci): push prod container images again (#11020) ## Problem https://github.com/neondatabase/neon/pull/10841 made building compute and neon images optional on releases that don't need them. The `push--image-prod` jobs had transitive dependencies that were skipped due to that, causing the images not to be pushed to production registries. ## Summary of changes Add `!failure() && !cancelled() &&` to the beginning of the conditions for these jobs to ensure they run even if some of their transitive dependencies are skipped. --- .github/workflows/build_and_test.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 0dcf11d32f..639c258c5c 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -906,7 +906,8 @@ jobs: push-neon-image-prod: needs: [ meta, generate-image-maps, neon-image, test-images ] - if: ${{ contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }} + # Depends on jobs that can get skipped + if: ${{ !failure() && !cancelled() && contains(fromJSON('["storage-release", "proxy-release"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -924,7 +925,8 @@ jobs: push-compute-image-prod: needs: [ meta, generate-image-maps, vm-compute-node-image, test-images ] - if: ${{ needs.meta.outputs.run-kind == 'compute-release' }} + # Depends on jobs that can get skipped + if: ${{ !failure() && !cancelled() && needs.meta.outputs.run-kind == 'compute-release' }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login From ab1f22b7d175ad504911ce41de66c7c51802c736 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 27 Feb 2025 10:26:55 -0600 Subject: [PATCH 051/207] fix(pageserver): correctly access layer map in gc-compaction (#11021) ## Problem layer_map access was unwrapped. It might return an error during shutdown. ## Summary of changes Propagate the layer_map access error back to the compaction loop. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 3f2f1a6e5f..c835980a7d 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -213,30 +213,33 @@ impl GcCompactionQueue { } /// Trigger an auto compaction. - pub async fn trigger_auto_compaction(&self, timeline: &Arc) { + pub async fn trigger_auto_compaction( + &self, + timeline: &Arc, + ) -> Result<(), CompactionError> { let GcCompactionCombinedSettings { gc_compaction_enabled, gc_compaction_initial_threshold_kb, gc_compaction_ratio_percent, } = timeline.get_gc_compaction_settings(); if !gc_compaction_enabled { - return; + return Ok(()); } if self.remaining_jobs_num() > 0 { // Only schedule auto compaction when the queue is empty - return; + return Ok(()); } if timeline.ancestor_timeline().is_some() { // Do not trigger auto compaction for child timelines. We haven't tested // it enough in staging yet. - return; + return Ok(()); } let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else { // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure // the fairness of the lock across timelines. We should listen for both `acquire` and `l0_compaction_trigger` // to ensure the fairness while avoid starving other tasks. - return; + return Ok(()); }; let gc_compaction_state = timeline.get_gc_compaction_state(); @@ -246,7 +249,7 @@ impl GcCompactionQueue { let layers = { let guard = timeline.layers.read().await; - let layer_map = guard.layer_map().unwrap(); + let layer_map = guard.layer_map()?; layer_map.iter_historic_layers().collect_vec() }; let mut l2_size: u64 = 0; @@ -323,6 +326,7 @@ impl GcCompactionQueue { l1_size, l2_size, l2_lsn, gc_cutoff ); } + Ok(()) } /// Notify the caller the job has finished and unblock GC. @@ -444,7 +448,7 @@ impl GcCompactionQueue { None } }) else { - self.trigger_auto_compaction(timeline).await; + self.trigger_auto_compaction(timeline).await?; // Always yield after triggering auto-compaction. Gc-compaction is a low-priority task and we // have not implemented preemption mechanism yet. We always want to yield it to more important // tasks if there is one. From a4b2009800c1d5bae70739f2a7e04ca867a83bcc Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Thu, 27 Feb 2025 22:13:06 +0200 Subject: [PATCH 052/207] compute_ctl: Refactor, moving spec_apply functions to spec_apply.rs (#11006) Seems nice to have the function and all its subroutines in the same source file. --- compute_tools/src/compute.rs | 399 +------------------------------ compute_tools/src/spec_apply.rs | 406 +++++++++++++++++++++++++++++++- 2 files changed, 404 insertions(+), 401 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 97fa45062b..c0e28790d6 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -1,5 +1,4 @@ -use std::collections::{HashMap, HashSet}; -use std::iter::once; +use std::collections::HashMap; use std::os::unix::fs::{PermissionsExt, symlink}; use std::path::Path; use std::process::{Command, Stdio}; @@ -13,9 +12,7 @@ use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use compute_api::privilege::Privilege; use compute_api::responses::{ComputeMetrics, ComputeStatus}; -use compute_api::spec::{ - ComputeFeature, ComputeMode, ComputeSpec, Database, ExtVersion, PgIdent, Role, -}; +use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent}; use futures::StreamExt; use futures::future::join_all; use futures::stream::FuturesUnordered; @@ -34,16 +31,6 @@ use utils::measured_stream::MeasuredReader; use crate::installed_extensions::get_installed_extensions; use crate::pg_helpers::*; use crate::spec::*; -use crate::spec_apply::ApplySpecPhase::{ - CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon, - CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, - HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, - RunInEachDatabase, -}; -use crate::spec_apply::PerDatabasePhase::{ - ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension, -}; -use crate::spec_apply::{DB, MutableApplyContext, PerDatabasePhase, apply_operations}; use crate::sync_sk::{check_if_synced, ping_safekeeper}; use crate::{config, extension_server, local_proxy}; @@ -928,388 +915,6 @@ impl ComputeNode { Ok(client) } - /// Apply the spec to the running PostgreSQL instance. - /// The caller can decide to run with multiple clients in parallel, or - /// single mode. Either way, the commands executed will be the same, and - /// only commands run in different databases are parallelized. - #[instrument(skip_all)] - pub fn apply_spec_sql( - &self, - spec: Arc, - conf: Arc, - concurrency: usize, - ) -> Result<()> { - info!("Applying config with max {} concurrency", concurrency); - debug!("Config: {:?}", spec); - - let rt = tokio::runtime::Handle::current(); - rt.block_on(async { - // Proceed with post-startup configuration. Note, that order of operations is important. - let client = Self::get_maintenance_client(&conf).await?; - let spec = spec.clone(); - - let databases = get_existing_dbs_async(&client).await?; - let roles = get_existing_roles_async(&client) - .await? - .into_iter() - .map(|role| (role.name.clone(), role)) - .collect::>(); - - // Check if we need to drop subscriptions before starting the endpoint. - // - // It is important to do this operation exactly once when endpoint starts on a new branch. - // Otherwise, we may drop not inherited, but newly created subscriptions. - // - // We cannot rely only on spec.drop_subscriptions_before_start flag, - // because if for some reason compute restarts inside VM, - // it will start again with the same spec and flag value. - // - // To handle this, we save the fact of the operation in the database - // in the neon.drop_subscriptions_done table. - // If the table does not exist, we assume that the operation was never performed, so we must do it. - // If table exists, we check if the operation was performed on the current timelilne. - // - let mut drop_subscriptions_done = false; - - if spec.drop_subscriptions_before_start { - let timeline_id = self.get_timeline_id().context("timeline_id must be set")?; - let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id); - - info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id); - - drop_subscriptions_done = match - client.simple_query(&query).await { - Ok(result) => { - matches!(&result[0], postgres::SimpleQueryMessage::Row(_)) - }, - Err(e) => - { - match e.code() { - Some(&SqlState::UNDEFINED_TABLE) => false, - _ => { - // We don't expect any other error here, except for the schema/table not existing - error!("Error checking if drop subscription operation was already performed: {}", e); - return Err(e.into()); - } - } - } - } - }; - - - let jwks_roles = Arc::new( - spec.as_ref() - .local_proxy_config - .iter() - .flat_map(|it| &it.jwks) - .flatten() - .flat_map(|setting| &setting.role_names) - .cloned() - .collect::>(), - ); - - let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext { - roles, - dbs: databases, - })); - - // Apply special pre drop database phase. - // NOTE: we use the code of RunInEachDatabase phase for parallelism - // and connection management, but we don't really run it in *each* database, - // only in databases, we're about to drop. - info!("Applying PerDatabase (pre-dropdb) phase"); - let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); - - // Run the phase for each database that we're about to drop. - let db_processes = spec - .delta_operations - .iter() - .flatten() - .filter_map(move |op| { - if op.action.as_str() == "delete_db" { - Some(op.name.clone()) - } else { - None - } - }) - .map(|dbname| { - let spec = spec.clone(); - let ctx = ctx.clone(); - let jwks_roles = jwks_roles.clone(); - let mut conf = conf.as_ref().clone(); - let concurrency_token = concurrency_token.clone(); - // We only need dbname field for this phase, so set other fields to dummy values - let db = DB::UserDB(Database { - name: dbname.clone(), - owner: "cloud_admin".to_string(), - options: None, - restrict_conn: false, - invalid: false, - }); - - debug!("Applying per-database phases for Database {:?}", &db); - - match &db { - DB::SystemDB => {} - DB::UserDB(db) => { - conf.dbname(db.name.as_str()); - } - } - - let conf = Arc::new(conf); - let fut = Self::apply_spec_sql_db( - spec.clone(), - conf, - ctx.clone(), - jwks_roles.clone(), - concurrency_token.clone(), - db, - [DropLogicalSubscriptions].to_vec(), - ); - - Ok(spawn(fut)) - }) - .collect::>>(); - - for process in db_processes.into_iter() { - let handle = process?; - if let Err(e) = handle.await? { - // Handle the error case where the database does not exist - // We do not check whether the DB exists or not in the deletion phase, - // so we shouldn't be strict about it in pre-deletion cleanup as well. - if e.to_string().contains("does not exist") { - warn!("Error dropping subscription: {}", e); - } else { - return Err(e); - } - }; - } - - for phase in [ - CreateSuperUser, - DropInvalidDatabases, - RenameRoles, - CreateAndAlterRoles, - RenameAndDeleteDatabases, - CreateAndAlterDatabases, - CreateSchemaNeon, - ] { - info!("Applying phase {:?}", &phase); - apply_operations( - spec.clone(), - ctx.clone(), - jwks_roles.clone(), - phase, - || async { Ok(&client) }, - ) - .await?; - } - - info!("Applying RunInEachDatabase2 phase"); - let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); - - let db_processes = spec - .cluster - .databases - .iter() - .map(|db| DB::new(db.clone())) - // include - .chain(once(DB::SystemDB)) - .map(|db| { - let spec = spec.clone(); - let ctx = ctx.clone(); - let jwks_roles = jwks_roles.clone(); - let mut conf = conf.as_ref().clone(); - let concurrency_token = concurrency_token.clone(); - let db = db.clone(); - - debug!("Applying per-database phases for Database {:?}", &db); - - match &db { - DB::SystemDB => {} - DB::UserDB(db) => { - conf.dbname(db.name.as_str()); - } - } - - let conf = Arc::new(conf); - let mut phases = vec![ - DeleteDBRoleReferences, - ChangeSchemaPerms, - HandleAnonExtension, - ]; - - if spec.drop_subscriptions_before_start && !drop_subscriptions_done { - info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set"); - phases.push(DropLogicalSubscriptions); - } - - let fut = Self::apply_spec_sql_db( - spec.clone(), - conf, - ctx.clone(), - jwks_roles.clone(), - concurrency_token.clone(), - db, - phases, - ); - - Ok(spawn(fut)) - }) - .collect::>>(); - - for process in db_processes.into_iter() { - let handle = process?; - handle.await??; - } - - let mut phases = vec![ - HandleOtherExtensions, - HandleNeonExtension, // This step depends on CreateSchemaNeon - CreateAvailabilityCheck, - DropRoles, - ]; - - // This step depends on CreateSchemaNeon - if spec.drop_subscriptions_before_start && !drop_subscriptions_done { - info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set"); - phases.push(FinalizeDropLogicalSubscriptions); - } - - for phase in phases { - debug!("Applying phase {:?}", &phase); - apply_operations( - spec.clone(), - ctx.clone(), - jwks_roles.clone(), - phase, - || async { Ok(&client) }, - ) - .await?; - } - - Ok::<(), anyhow::Error>(()) - })?; - - Ok(()) - } - - /// Apply SQL migrations of the RunInEachDatabase phase. - /// - /// May opt to not connect to databases that don't have any scheduled - /// operations. The function is concurrency-controlled with the provided - /// semaphore. The caller has to make sure the semaphore isn't exhausted. - async fn apply_spec_sql_db( - spec: Arc, - conf: Arc, - ctx: Arc>, - jwks_roles: Arc>, - concurrency_token: Arc, - db: DB, - subphases: Vec, - ) -> Result<()> { - let _permit = concurrency_token.acquire().await?; - - let mut client_conn = None; - - for subphase in subphases { - apply_operations( - spec.clone(), - ctx.clone(), - jwks_roles.clone(), - RunInEachDatabase { - db: db.clone(), - subphase, - }, - // Only connect if apply_operation actually wants a connection. - // It's quite possible this database doesn't need any queries, - // so by not connecting we save time and effort connecting to - // that database. - || async { - if client_conn.is_none() { - let db_client = Self::get_maintenance_client(&conf).await?; - client_conn.replace(db_client); - } - let client = client_conn.as_ref().unwrap(); - Ok(client) - }, - ) - .await?; - } - - drop(client_conn); - - Ok::<(), anyhow::Error>(()) - } - - /// Choose how many concurrent connections to use for applying the spec changes. - pub fn max_service_connections( - &self, - compute_state: &ComputeState, - spec: &ComputeSpec, - ) -> usize { - // If the cluster is in Init state we don't have to deal with user connections, - // and can thus use all `max_connections` connection slots. However, that's generally not - // very efficient, so we generally still limit it to a smaller number. - if compute_state.status == ComputeStatus::Init { - // If the settings contain 'max_connections', use that as template - if let Some(config) = spec.cluster.settings.find("max_connections") { - config.parse::().ok() - } else { - // Otherwise, try to find the setting in the postgresql_conf string - spec.cluster - .postgresql_conf - .iter() - .flat_map(|conf| conf.split("\n")) - .filter_map(|line| { - if !line.contains("max_connections") { - return None; - } - - let (key, value) = line.split_once("=")?; - let key = key - .trim_start_matches(char::is_whitespace) - .trim_end_matches(char::is_whitespace); - - let value = value - .trim_start_matches(char::is_whitespace) - .trim_end_matches(char::is_whitespace); - - if key != "max_connections" { - return None; - } - - value.parse::().ok() - }) - .next() - } - // If max_connections is present, use at most 1/3rd of that. - // When max_connections is lower than 30, try to use at least 10 connections, but - // never more than max_connections. - .map(|limit| match limit { - 0..10 => limit, - 10..30 => 10, - 30.. => limit / 3, - }) - // If we didn't find max_connections, default to 10 concurrent connections. - .unwrap_or(10) - } else { - // state == Running - // Because the cluster is already in the Running state, we should assume users are - // already connected to the cluster, and high concurrency could negatively - // impact user connectivity. Therefore, we can limit concurrency to the number of - // reserved superuser connections, which users wouldn't be able to use anyway. - spec.cluster - .settings - .find("superuser_reserved_connections") - .iter() - .filter_map(|val| val.parse::().ok()) - .map(|val| if val > 1 { val - 1 } else { 1 }) - .last() - .unwrap_or(3) - } - } - /// Do initial configuration of the already started Postgres. #[instrument(skip_all)] pub fn apply_config(&self, compute_state: &ComputeState) -> Result<()> { diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs index b4e084fd91..f9a37c5c98 100644 --- a/compute_tools/src/spec_apply.rs +++ b/compute_tools/src/spec_apply.rs @@ -4,15 +4,413 @@ use std::future::Future; use std::iter::{empty, once}; use std::sync::Arc; -use anyhow::Result; +use anyhow::{Context, Result}; +use compute_api::responses::ComputeStatus; use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role}; use futures::future::join_all; use tokio::sync::RwLock; use tokio_postgres::Client; -use tracing::{Instrument, debug, info_span, warn}; +use tokio_postgres::error::SqlState; +use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; -use crate::compute::construct_superuser_query; -use crate::pg_helpers::{DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal}; +use crate::compute::{ComputeNode, ComputeState, construct_superuser_query}; +use crate::pg_helpers::{ + DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async, + get_existing_roles_async, +}; +use crate::spec_apply::ApplySpecPhase::{ + CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon, + CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, + HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, + RunInEachDatabase, +}; +use crate::spec_apply::PerDatabasePhase::{ + ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension, +}; + +impl ComputeNode { + /// Apply the spec to the running PostgreSQL instance. + /// The caller can decide to run with multiple clients in parallel, or + /// single mode. Either way, the commands executed will be the same, and + /// only commands run in different databases are parallelized. + #[instrument(skip_all)] + pub fn apply_spec_sql( + &self, + spec: Arc, + conf: Arc, + concurrency: usize, + ) -> Result<()> { + info!("Applying config with max {} concurrency", concurrency); + debug!("Config: {:?}", spec); + + let rt = tokio::runtime::Handle::current(); + rt.block_on(async { + // Proceed with post-startup configuration. Note, that order of operations is important. + let client = Self::get_maintenance_client(&conf).await?; + let spec = spec.clone(); + + let databases = get_existing_dbs_async(&client).await?; + let roles = get_existing_roles_async(&client) + .await? + .into_iter() + .map(|role| (role.name.clone(), role)) + .collect::>(); + + // Check if we need to drop subscriptions before starting the endpoint. + // + // It is important to do this operation exactly once when endpoint starts on a new branch. + // Otherwise, we may drop not inherited, but newly created subscriptions. + // + // We cannot rely only on spec.drop_subscriptions_before_start flag, + // because if for some reason compute restarts inside VM, + // it will start again with the same spec and flag value. + // + // To handle this, we save the fact of the operation in the database + // in the neon.drop_subscriptions_done table. + // If the table does not exist, we assume that the operation was never performed, so we must do it. + // If table exists, we check if the operation was performed on the current timelilne. + // + let mut drop_subscriptions_done = false; + + if spec.drop_subscriptions_before_start { + let timeline_id = self.get_timeline_id().context("timeline_id must be set")?; + let query = format!("select 1 from neon.drop_subscriptions_done where timeline_id = '{}'", timeline_id); + + info!("Checking if drop subscription operation was already performed for timeline_id: {}", timeline_id); + + drop_subscriptions_done = match + client.simple_query(&query).await { + Ok(result) => { + matches!(&result[0], postgres::SimpleQueryMessage::Row(_)) + }, + Err(e) => + { + match e.code() { + Some(&SqlState::UNDEFINED_TABLE) => false, + _ => { + // We don't expect any other error here, except for the schema/table not existing + error!("Error checking if drop subscription operation was already performed: {}", e); + return Err(e.into()); + } + } + } + } + }; + + + let jwks_roles = Arc::new( + spec.as_ref() + .local_proxy_config + .iter() + .flat_map(|it| &it.jwks) + .flatten() + .flat_map(|setting| &setting.role_names) + .cloned() + .collect::>(), + ); + + let ctx = Arc::new(tokio::sync::RwLock::new(MutableApplyContext { + roles, + dbs: databases, + })); + + // Apply special pre drop database phase. + // NOTE: we use the code of RunInEachDatabase phase for parallelism + // and connection management, but we don't really run it in *each* database, + // only in databases, we're about to drop. + info!("Applying PerDatabase (pre-dropdb) phase"); + let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); + + // Run the phase for each database that we're about to drop. + let db_processes = spec + .delta_operations + .iter() + .flatten() + .filter_map(move |op| { + if op.action.as_str() == "delete_db" { + Some(op.name.clone()) + } else { + None + } + }) + .map(|dbname| { + let spec = spec.clone(); + let ctx = ctx.clone(); + let jwks_roles = jwks_roles.clone(); + let mut conf = conf.as_ref().clone(); + let concurrency_token = concurrency_token.clone(); + // We only need dbname field for this phase, so set other fields to dummy values + let db = DB::UserDB(Database { + name: dbname.clone(), + owner: "cloud_admin".to_string(), + options: None, + restrict_conn: false, + invalid: false, + }); + + debug!("Applying per-database phases for Database {:?}", &db); + + match &db { + DB::SystemDB => {} + DB::UserDB(db) => { + conf.dbname(db.name.as_str()); + } + } + + let conf = Arc::new(conf); + let fut = Self::apply_spec_sql_db( + spec.clone(), + conf, + ctx.clone(), + jwks_roles.clone(), + concurrency_token.clone(), + db, + [DropLogicalSubscriptions].to_vec(), + ); + + Ok(tokio::spawn(fut)) + }) + .collect::>>(); + + for process in db_processes.into_iter() { + let handle = process?; + if let Err(e) = handle.await? { + // Handle the error case where the database does not exist + // We do not check whether the DB exists or not in the deletion phase, + // so we shouldn't be strict about it in pre-deletion cleanup as well. + if e.to_string().contains("does not exist") { + warn!("Error dropping subscription: {}", e); + } else { + return Err(e); + } + }; + } + + for phase in [ + CreateSuperUser, + DropInvalidDatabases, + RenameRoles, + CreateAndAlterRoles, + RenameAndDeleteDatabases, + CreateAndAlterDatabases, + CreateSchemaNeon, + ] { + info!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + info!("Applying RunInEachDatabase2 phase"); + let concurrency_token = Arc::new(tokio::sync::Semaphore::new(concurrency)); + + let db_processes = spec + .cluster + .databases + .iter() + .map(|db| DB::new(db.clone())) + // include + .chain(once(DB::SystemDB)) + .map(|db| { + let spec = spec.clone(); + let ctx = ctx.clone(); + let jwks_roles = jwks_roles.clone(); + let mut conf = conf.as_ref().clone(); + let concurrency_token = concurrency_token.clone(); + let db = db.clone(); + + debug!("Applying per-database phases for Database {:?}", &db); + + match &db { + DB::SystemDB => {} + DB::UserDB(db) => { + conf.dbname(db.name.as_str()); + } + } + + let conf = Arc::new(conf); + let mut phases = vec![ + DeleteDBRoleReferences, + ChangeSchemaPerms, + HandleAnonExtension, + ]; + + if spec.drop_subscriptions_before_start && !drop_subscriptions_done { + info!("Adding DropLogicalSubscriptions phase because drop_subscriptions_before_start is set"); + phases.push(DropLogicalSubscriptions); + } + + let fut = Self::apply_spec_sql_db( + spec.clone(), + conf, + ctx.clone(), + jwks_roles.clone(), + concurrency_token.clone(), + db, + phases, + ); + + Ok(tokio::spawn(fut)) + }) + .collect::>>(); + + for process in db_processes.into_iter() { + let handle = process?; + handle.await??; + } + + let mut phases = vec![ + HandleOtherExtensions, + HandleNeonExtension, // This step depends on CreateSchemaNeon + CreateAvailabilityCheck, + DropRoles, + ]; + + // This step depends on CreateSchemaNeon + if spec.drop_subscriptions_before_start && !drop_subscriptions_done { + info!("Adding FinalizeDropLogicalSubscriptions phase because drop_subscriptions_before_start is set"); + phases.push(FinalizeDropLogicalSubscriptions); + } + + for phase in phases { + debug!("Applying phase {:?}", &phase); + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + phase, + || async { Ok(&client) }, + ) + .await?; + } + + Ok::<(), anyhow::Error>(()) + })?; + + Ok(()) + } + + /// Apply SQL migrations of the RunInEachDatabase phase. + /// + /// May opt to not connect to databases that don't have any scheduled + /// operations. The function is concurrency-controlled with the provided + /// semaphore. The caller has to make sure the semaphore isn't exhausted. + async fn apply_spec_sql_db( + spec: Arc, + conf: Arc, + ctx: Arc>, + jwks_roles: Arc>, + concurrency_token: Arc, + db: DB, + subphases: Vec, + ) -> Result<()> { + let _permit = concurrency_token.acquire().await?; + + let mut client_conn = None; + + for subphase in subphases { + apply_operations( + spec.clone(), + ctx.clone(), + jwks_roles.clone(), + RunInEachDatabase { + db: db.clone(), + subphase, + }, + // Only connect if apply_operation actually wants a connection. + // It's quite possible this database doesn't need any queries, + // so by not connecting we save time and effort connecting to + // that database. + || async { + if client_conn.is_none() { + let db_client = Self::get_maintenance_client(&conf).await?; + client_conn.replace(db_client); + } + let client = client_conn.as_ref().unwrap(); + Ok(client) + }, + ) + .await?; + } + + drop(client_conn); + + Ok::<(), anyhow::Error>(()) + } + + /// Choose how many concurrent connections to use for applying the spec changes. + pub fn max_service_connections( + &self, + compute_state: &ComputeState, + spec: &ComputeSpec, + ) -> usize { + // If the cluster is in Init state we don't have to deal with user connections, + // and can thus use all `max_connections` connection slots. However, that's generally not + // very efficient, so we generally still limit it to a smaller number. + if compute_state.status == ComputeStatus::Init { + // If the settings contain 'max_connections', use that as template + if let Some(config) = spec.cluster.settings.find("max_connections") { + config.parse::().ok() + } else { + // Otherwise, try to find the setting in the postgresql_conf string + spec.cluster + .postgresql_conf + .iter() + .flat_map(|conf| conf.split("\n")) + .filter_map(|line| { + if !line.contains("max_connections") { + return None; + } + + let (key, value) = line.split_once("=")?; + let key = key + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + let value = value + .trim_start_matches(char::is_whitespace) + .trim_end_matches(char::is_whitespace); + + if key != "max_connections" { + return None; + } + + value.parse::().ok() + }) + .next() + } + // If max_connections is present, use at most 1/3rd of that. + // When max_connections is lower than 30, try to use at least 10 connections, but + // never more than max_connections. + .map(|limit| match limit { + 0..10 => limit, + 10..30 => 10, + 30.. => limit / 3, + }) + // If we didn't find max_connections, default to 10 concurrent connections. + .unwrap_or(10) + } else { + // state == Running + // Because the cluster is already in the Running state, we should assume users are + // already connected to the cluster, and high concurrency could negatively + // impact user connectivity. Therefore, we can limit concurrency to the number of + // reserved superuser connections, which users wouldn't be able to use anyway. + spec.cluster + .settings + .find("superuser_reserved_connections") + .iter() + .filter_map(|val| val.parse::().ok()) + .map(|val| if val > 1 { val - 1 } else { 1 }) + .last() + .unwrap_or(3) + } + } +} #[derive(Clone)] pub enum DB { From 55633ebe3aa8d60057000fdfaa82210076369962 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 28 Feb 2025 08:42:08 +0000 Subject: [PATCH 053/207] storcon: enable API passthrough to nonzero shards (#11026) ## Problem Storage controller will proxy GETs to pageserver-like tenant/timeline paths through to the pageserver. Usually GET passthroughs make sense to go to shard 0, e.g. if you want to list timelines. But sometimes you really want to know about a particular shard, e.g. reading its cache state or similar. ## Summary of changes - Accept shard IDs as well as tenant IDs in the passthrough route - Refactor node lookup to take a shard ID and make the tenant ID case a layer on top of that. This is one more lock take-drop during these requests, but it's not particularly expensive and these requests shouldn't be terribly frequent This is not immediately used by anything, but will be there any time we want to e.g. do a pass-through query to check the warmth of a tenant cache on a particular shard or somesuch. --- storage_controller/src/http.rs | 23 ++++++++++---- storage_controller/src/service.rs | 50 +++++++++++++++++++++++-------- 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 5b5ae80eaf..de4d45adbe 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -547,7 +547,7 @@ async fn handle_tenant_timeline_passthrough( service: Arc, req: Request, ) -> Result, ApiError> { - let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + let tenant_or_shard_id: TenantShardId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; let req = match maybe_forward(req).await { @@ -562,15 +562,28 @@ async fn handle_tenant_timeline_passthrough( return Err(ApiError::BadRequest(anyhow::anyhow!("Missing path"))); }; - tracing::info!("Proxying request for tenant {} ({})", tenant_id, path); + tracing::info!( + "Proxying request for tenant {} ({})", + tenant_or_shard_id.tenant_id, + path + ); // Find the node that holds shard zero - let (node, tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?; + let (node, tenant_shard_id) = if tenant_or_shard_id.is_unsharded() { + service + .tenant_shard0_node(tenant_or_shard_id.tenant_id) + .await? + } else { + ( + service.tenant_shard_node(tenant_or_shard_id).await?, + tenant_or_shard_id, + ) + }; // Callers will always pass an unsharded tenant ID. Before proxying, we must // rewrite this to a shard-aware shard zero ID. let path = format!("{}", path); - let tenant_str = tenant_id.to_string(); + let tenant_str = tenant_or_shard_id.tenant_id.to_string(); let tenant_shard_str = format!("{}", tenant_shard_id); let path = path.replace(&tenant_str, &tenant_shard_str); @@ -610,7 +623,7 @@ async fn handle_tenant_timeline_passthrough( // Transform 404 into 503 if we raced with a migration if resp.status() == reqwest::StatusCode::NOT_FOUND { // Look up node again: if we migrated it will be different - let (new_node, _tenant_shard_id) = service.tenant_shard0_node(tenant_id).await?; + let new_node = service.tenant_shard_node(tenant_shard_id).await?; if new_node.get_id() != node.get_id() { // Rather than retry here, send the client a 503 to prompt a retry: this matches // the pageserver's use of 503, and all clients calling this API should retry on 503. diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 9a3e042c24..91ce4b83e0 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -4158,16 +4158,14 @@ impl Service { }).await? } - /// When you need to send an HTTP request to the pageserver that holds shard0 of a tenant, this - /// function looks up and returns node. If the tenant isn't found, returns Err(ApiError::NotFound) + /// When you know the TenantId but not a specific shard, and would like to get the node holding shard 0. pub(crate) async fn tenant_shard0_node( &self, tenant_id: TenantId, ) -> Result<(Node, TenantShardId), ApiError> { - // Look up in-memory state and maybe use the node from there. - { + let tenant_shard_id = { let locked = self.inner.read().unwrap(); - let Some((tenant_shard_id, shard)) = locked + let Some((tenant_shard_id, _shard)) = locked .tenants .range(TenantShardId::tenant_range(tenant_id)) .next() @@ -4177,6 +4175,29 @@ impl Service { )); }; + *tenant_shard_id + }; + + self.tenant_shard_node(tenant_shard_id) + .await + .map(|node| (node, tenant_shard_id)) + } + + /// When you need to send an HTTP request to the pageserver that holds a shard of a tenant, this + /// function looks up and returns node. If the shard isn't found, returns Err(ApiError::NotFound) + pub(crate) async fn tenant_shard_node( + &self, + tenant_shard_id: TenantShardId, + ) -> Result { + // Look up in-memory state and maybe use the node from there. + { + let locked = self.inner.read().unwrap(); + let Some(shard) = locked.tenants.get(&tenant_shard_id) else { + return Err(ApiError::NotFound( + anyhow::anyhow!("Tenant shard {tenant_shard_id} not found").into(), + )); + }; + let Some(intent_node_id) = shard.intent.get_attached() else { tracing::warn!( tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), @@ -4197,7 +4218,7 @@ impl Service { "Shard refers to nonexistent node" ))); }; - return Ok((node.clone(), *tenant_shard_id)); + return Ok(node.clone()); } }; @@ -4205,29 +4226,34 @@ impl Service { // generation state: this will reflect the progress of any ongoing migration. // Note that it is not guaranteed to _stay_ here, our caller must still handle // the case where they call through to the pageserver and get a 404. - let db_result = self.persistence.tenant_generations(tenant_id).await?; + let db_result = self + .persistence + .tenant_generations(tenant_shard_id.tenant_id) + .await?; let Some(ShardGenerationState { - tenant_shard_id, + tenant_shard_id: _, generation: _, generation_pageserver: Some(node_id), - }) = db_result.first() + }) = db_result + .into_iter() + .find(|s| s.tenant_shard_id == tenant_shard_id) else { // This can happen if we raced with a tenant deletion or a shard split. On a retry // the caller will either succeed (shard split case), get a proper 404 (deletion case), // or a conflict response (case where tenant was detached in background) return Err(ApiError::ResourceUnavailable( - "Shard {} not found in database, or is not attached".into(), + format!("Shard {tenant_shard_id} not found in database, or is not attached").into(), )); }; let locked = self.inner.read().unwrap(); - let Some(node) = locked.nodes.get(node_id) else { + let Some(node) = locked.nodes.get(&node_id) else { // This should never happen return Err(ApiError::InternalServerError(anyhow::anyhow!( "Shard refers to nonexistent node" ))); }; - Ok((node.clone(), *tenant_shard_id)) + Ok(node.clone()) } pub(crate) fn tenant_locate( From 0d6d58bd3e98cb459d8ce0b7ee78c5b3a7890f17 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 28 Feb 2025 10:36:53 +0000 Subject: [PATCH 054/207] pageserver: make heatmap layer download API more cplane friendly (#10957) ## Problem We intend for cplane to use the heatmap layer download API to warm up timelines after unarchival. It's tricky for them to recurse in the ancestors, and the current implementation doesn't work well when unarchiving a chain of branches and warming them up. ## Summary of changes * Add a `recurse` flag to the API. When the flag is set, the operation recurses into the parent timeline after the current one is done. * Be resilient to warming up a chain of unarchived branches. Let's say we unarchived `B` and `C` from the `A -> B -> C` branch hierarchy. `B` got unarchived first. We generated the unarchival heatmaps and stash them in `A` and `B`. When `C` unarchived, it dropped it's unarchival heatmap since `A` and `B` already had one. If `C` needed layers from `A` and `B`, it was out of luck. Now, when choosing whether to keep an unarchival heatmap we look at its end LSN. If it's more inclusive than what we currently have, keep it. --- pageserver/client/src/mgmt_api.rs | 4 ++ pageserver/src/http/routes.rs | 5 +- pageserver/src/pgdatadir_mapping.rs | 2 + pageserver/src/tenant.rs | 8 ++- pageserver/src/tenant/timeline.rs | 53 +++++++++++-------- .../timeline/heatmap_layers_downloader.rs | 25 ++++++++- storage_controller/src/http.rs | 3 +- storage_controller/src/pageserver_client.rs | 8 ++- storage_controller/src/service.rs | 8 ++- test_runner/fixtures/neon_fixtures.py | 13 ++++- .../regress/test_pageserver_secondary.py | 42 ++++++--------- 11 files changed, 111 insertions(+), 60 deletions(-) diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index f19b4e964d..37c914c4e9 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -480,6 +480,7 @@ impl Client { tenant_shard_id: TenantShardId, timeline_id: TimelineId, concurrency: Option, + recurse: bool, ) -> Result<()> { let mut path = reqwest::Url::parse(&format!( "{}/v1/tenant/{}/timeline/{}/download_heatmap_layers", @@ -487,6 +488,9 @@ impl Client { )) .expect("Cannot build URL"); + path.query_pairs_mut() + .append_pair("recurse", &format!("{}", recurse)); + if let Some(concurrency) = concurrency { path.query_pairs_mut() .append_pair("concurrency", &format!("{}", concurrency)); diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index b738d22740..a3ee31d6e6 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -1435,6 +1435,7 @@ async fn timeline_download_heatmap_layers_handler( let desired_concurrency = parse_query_param(&request, "concurrency")?.unwrap_or(DEFAULT_CONCURRENCY); + let recurse = parse_query_param(&request, "recurse")?.unwrap_or(false); check_permission(&request, Some(tenant_shard_id.tenant_id))?; @@ -1451,9 +1452,7 @@ async fn timeline_download_heatmap_layers_handler( .unwrap_or(DEFAULT_MAX_CONCURRENCY); let concurrency = std::cmp::min(max_concurrency, desired_concurrency); - timeline - .start_heatmap_layers_download(concurrency, &ctx) - .await?; + timeline.start_heatmap_layers_download(concurrency, recurse, &ctx)?; json_response(StatusCode::ACCEPTED, ()) } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 787b1b895c..c10dfb4542 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1052,6 +1052,8 @@ impl Timeline { ) -> Result { debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id(); + fail::fail_point!("skip-logical-size-calculation", |_| { Ok(0) }); + // Fetch list of database dirs and iterate them let buf = self.get(DBDIR_KEY, lsn, ctx).await?; let dbdir = DbDirectory::des(&buf)?; diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 9243f131ad..11d656eb25 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1153,12 +1153,15 @@ impl Tenant { let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn())); while let Some((tline, end_lsn)) = tline_ending_at { let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await; - if !tline.is_previous_heatmap_active() { + // Another unearchived timeline might have generated a heatmap for this ancestor. + // If the current branch point greater than the previous one use the the heatmap + // we just generated - it should include more layers. + if !tline.should_keep_previous_heatmap(end_lsn) { tline .previous_heatmap .store(Some(Arc::new(unarchival_heatmap))); } else { - tracing::info!("Previous heatmap still active. Dropping unarchival heatmap.") + tracing::info!("Previous heatmap preferred. Dropping unarchival heatmap.") } match tline.ancestor_timeline() { @@ -1939,6 +1942,7 @@ impl Tenant { hs.0.remove(&timeline_id).map(|h| PreviousHeatmap::Active { heatmap: h, read_at: hs.1, + end_lsn: None, }) }); part_downloads.spawn( diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 662088fbde..851f84f603 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -442,6 +442,8 @@ pub(crate) enum PreviousHeatmap { Active { heatmap: HeatMapTimeline, read_at: std::time::Instant, + // End LSN covered by the heatmap if known + end_lsn: Option, }, Obsolete, } @@ -3570,12 +3572,16 @@ impl Timeline { Ok(layer) } - pub(super) fn is_previous_heatmap_active(&self) -> bool { - self.previous_heatmap - .load() - .as_ref() - .map(|prev| matches!(**prev, PreviousHeatmap::Active { .. })) - .unwrap_or(false) + pub(super) fn should_keep_previous_heatmap(&self, new_heatmap_end_lsn: Lsn) -> bool { + let crnt = self.previous_heatmap.load(); + match crnt.as_deref() { + Some(PreviousHeatmap::Active { end_lsn, .. }) => match end_lsn { + Some(crnt_end_lsn) => *crnt_end_lsn > new_heatmap_end_lsn, + None => true, + }, + Some(PreviousHeatmap::Obsolete) => false, + None => false, + } } /// The timeline heatmap is a hint to secondary locations from the primary location, @@ -3603,26 +3609,26 @@ impl Timeline { // heatamp. let previous_heatmap = self.previous_heatmap.load(); let visible_non_resident = match previous_heatmap.as_deref() { - Some(PreviousHeatmap::Active { heatmap, read_at }) => { - Some(heatmap.layers.iter().filter_map(|hl| { - let desc: PersistentLayerDesc = hl.name.clone().into(); - let layer = guard.try_get_from_key(&desc.key())?; + Some(PreviousHeatmap::Active { + heatmap, read_at, .. + }) => Some(heatmap.layers.iter().filter_map(|hl| { + let desc: PersistentLayerDesc = hl.name.clone().into(); + let layer = guard.try_get_from_key(&desc.key())?; - if layer.visibility() == LayerVisibilityHint::Covered { - return None; - } + if layer.visibility() == LayerVisibilityHint::Covered { + return None; + } - if layer.is_likely_resident() { - return None; - } + if layer.is_likely_resident() { + return None; + } - if layer.last_evicted_at().happened_after(*read_at) { - return None; - } + if layer.last_evicted_at().happened_after(*read_at) { + return None; + } - Some((desc, hl.metadata.clone(), hl.access_time)) - })) - } + Some((desc, hl.metadata.clone(), hl.access_time)) + })), Some(PreviousHeatmap::Obsolete) => None, None => None, }; @@ -3709,6 +3715,7 @@ impl Timeline { PreviousHeatmap::Active { heatmap, read_at: Instant::now(), + end_lsn: Some(end_lsn), } } @@ -7046,6 +7053,7 @@ mod tests { .store(Some(Arc::new(PreviousHeatmap::Active { heatmap: heatmap.clone(), read_at: std::time::Instant::now(), + end_lsn: None, }))); // Generate a new heatmap and assert that it contains the same layers as the old one. @@ -7148,6 +7156,7 @@ mod tests { .store(Some(Arc::new(PreviousHeatmap::Active { heatmap: heatmap.clone(), read_at: std::time::Instant::now(), + end_lsn: None, }))); // Evict all the layers in the previous heatmap diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs index 184c830464..6209b63de4 100644 --- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs +++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs @@ -32,6 +32,7 @@ impl HeatmapLayersDownloader { fn new( timeline: Arc, concurrency: usize, + recurse: bool, ctx: RequestContext, ) -> Result { let tl_guard = timeline.gate.enter().map_err(|_| ApiError::Cancelled)?; @@ -98,6 +99,20 @@ impl HeatmapLayersDownloader { }, _ = cancel.cancelled() => { tracing::info!("Heatmap layers download cancelled"); + return; + } + } + + if recurse { + if let Some(ancestor) = timeline.ancestor_timeline() { + let ctx = ctx.attached_child(); + let res = + ancestor.start_heatmap_layers_download(concurrency, recurse, &ctx); + if let Err(err) = res { + tracing::info!( + "Failed to start heatmap layers download for ancestor: {err}" + ); + } } } } @@ -140,14 +155,20 @@ impl HeatmapLayersDownloader { } impl Timeline { - pub(crate) async fn start_heatmap_layers_download( + pub(crate) fn start_heatmap_layers_download( self: &Arc, concurrency: usize, + recurse: bool, ctx: &RequestContext, ) -> Result<(), ApiError> { let mut locked = self.heatmap_layers_downloader.lock().unwrap(); if locked.as_ref().map(|dl| dl.is_complete()).unwrap_or(true) { - let dl = HeatmapLayersDownloader::new(self.clone(), concurrency, ctx.attached_child())?; + let dl = HeatmapLayersDownloader::new( + self.clone(), + concurrency, + recurse, + ctx.attached_child(), + )?; *locked = Some(dl); Ok(()) } else { diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index de4d45adbe..64f0be3c23 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -524,9 +524,10 @@ async fn handle_tenant_timeline_download_heatmap_layers( let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; let concurrency: Option = parse_query_param(&req, "concurrency")?; + let recurse = parse_query_param(&req, "recurse")?.unwrap_or(false); service - .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency) + .tenant_timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency, recurse) .await?; json_response(StatusCode::OK, ()) diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index e9c54414a3..d6127c355a 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -281,13 +281,19 @@ impl PageserverClient { tenant_shard_id: TenantShardId, timeline_id: TimelineId, concurrency: Option, + recurse: bool, ) -> Result<()> { measured_request!( "download_heatmap_layers", crate::metrics::Method::Post, &self.node_id_label, self.inner - .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency) + .timeline_download_heatmap_layers( + tenant_shard_id, + timeline_id, + concurrency, + recurse + ) .await ) } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 91ce4b83e0..9ba9504718 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -3774,6 +3774,7 @@ impl Service { tenant_shard_id: TenantShardId, timeline_id: TimelineId, concurrency: Option, + recurse: bool, ) -> Result<(), ApiError> { let _tenant_lock = trace_shared_lock( &self.tenant_op_locks, @@ -3811,7 +3812,12 @@ impl Service { targets, |tenant_shard_id, client| async move { client - .timeline_download_heatmap_layers(tenant_shard_id, timeline_id, concurrency) + .timeline_download_heatmap_layers( + tenant_shard_id, + timeline_id, + concurrency, + recurse, + ) .await }, 1, diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5159ad4e3b..73c8406237 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -2469,12 +2469,21 @@ class NeonStorageController(MetricsGetter, LogUtils): response.raise_for_status() return [TenantShardId.parse(tid) for tid in response.json()["updated"]] - def download_heatmap_layers(self, tenant_shard_id: TenantShardId, timeline_id: TimelineId): + def download_heatmap_layers( + self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, recurse: bool | None = None + ): + url = ( + f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers" + ) + if recurse is not None: + url = url + f"?recurse={str(recurse).lower()}" + response = self.request( "POST", - f"{self.api}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/download_heatmap_layers", + url, headers=self.headers(TokenScope.ADMIN), ) + response.raise_for_status() def __enter__(self) -> Self: diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index a9b897b741..ab0f00db1c 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -938,9 +938,12 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): # Expect lots of layers assert len(ps_attached.list_layers(tenant_id, timeline_id)) > 10 - # Simulate large data by making layer downloads artifically slow for ps in env.pageservers: + # Simulate large data by making layer downloads artifically slow ps.http_client().configure_failpoints([("secondary-layer-download-sleep", "return(1000)")]) + # Make the initial logical size calculation lie. Otherwise it on demand downloads + # layers and makes accounting difficult. + ps.http_client().configure_failpoints(("skip-logical-size-calculation", "return")) def timeline_heatmap(tlid): assert env.pageserver_remote_storage is not None @@ -952,21 +955,6 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): raise RuntimeError(f"No heatmap for timeline: {tlid}") - # Upload a heatmap, so that secondaries have something to download - ps_attached.http_client().tenant_heatmap_upload(tenant_id) - heatmap_before_migration = timeline_heatmap(timeline_id) - - # This has no chance to succeed: we have lots of layers and each one takes at least 1000ms. - # However, it pulls the heatmap, which will be important later. - http_client = env.storage_controller.pageserver_api() - (status, progress) = http_client.tenant_secondary_download(tenant_id, wait_ms=4000) - assert status == 202 - assert progress["heatmap_mtime"] is not None - assert progress["layers_downloaded"] > 0 - assert progress["bytes_downloaded"] > 0 - assert progress["layers_total"] > progress["layers_downloaded"] - assert progress["bytes_total"] > progress["bytes_downloaded"] - env.storage_controller.allowed_errors.extend( [ ".*Timed out.*downloading layers.*", @@ -975,6 +963,7 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): # Use a custom configuration that gives up earlier than usual. # We can't hydrate everything anyway because of the failpoints. + # Implicitly, this also uploads a heatmap from the current attached location. config = StorageControllerMigrationConfig( secondary_warmup_timeout="5s", secondary_download_request_timeout="2s" ) @@ -988,22 +977,17 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): ps_secondary.http_client().tenant_heatmap_upload(tenant_id) heatmap_after_migration = timeline_heatmap(timeline_id) - assert len(heatmap_before_migration["layers"]) > 0 + local_layers = ps_secondary.list_layers(tenant_id, timeline_id) + # We download 1 layer per second and give up within 5 seconds. + assert len(local_layers) < 10 after_migration_heatmap_layers_count = len(heatmap_after_migration["layers"]) - assert len(heatmap_before_migration["layers"]) <= after_migration_heatmap_layers_count - log.info(f"Heatmap size after cold migration is {after_migration_heatmap_layers_count}") env.storage_controller.download_heatmap_layers( TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id ) - # Now simulate the case where a child timeline is archived, parent layers - # are evicted and the child is unarchived. When the child is unarchived, - # itself and the parent update their heatmaps to contain layers needed by the - # child. One can warm up the timeline hierarchy since the heatmaps are ready. - def all_layers_downloaded(expected_layer_count: int): local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id)) @@ -1011,8 +995,9 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): assert local_layers_count >= expected_layer_count wait_until(lambda: all_layers_downloaded(after_migration_heatmap_layers_count)) - ps_secondary.http_client().tenant_heatmap_upload(tenant_id) + # Read everything and make sure that we're not downloading anything extra. + # All hot layers should be available locally now. before = ( ps_secondary.http_client() .get_metrics() @@ -1030,6 +1015,11 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): workload.stop() assert before == after + # Now simulate the case where a child timeline is archived, parent layers + # are evicted and the child is unarchived. When the child is unarchived, + # itself and the parent update their heatmaps to contain layers needed by the + # child. One can warm up the timeline hierarchy since the heatmaps are ready. + def check_archival_state(state: TimelineArchivalState, tline): timelines = ( timeline["timeline_id"] @@ -1064,6 +1054,6 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): assert expected_locally > 0 env.storage_controller.download_heatmap_layers( - TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id + TenantShardId(tenant_id, shard_number=0, shard_count=0), child_timeline_id, recurse=True ) wait_until(lambda: all_layers_downloaded(expected_locally)) From 7607686f25d28c7ad0ff338a5f196b31d6d2c82d Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Fri, 28 Feb 2025 12:16:22 +0100 Subject: [PATCH 055/207] Make test extensions upgrade work with absent images (#11036) ## Problem CI does not pass for the compute release due to the absence of some images ## Summary of changes Now we use the images from the old non-compute releases for non-compute images --- .github/workflows/build_and_test.yml | 2 +- docker-compose/test_extensions_upgrade.sh | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 639c258c5c..fb6da2f173 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -831,7 +831,7 @@ jobs: || needs.meta.outputs.run-kind == 'pr' && needs.meta.outputs.build-tag || needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release }} - TEST_EXTENSIONS_TAG: latest + TEST_EXTENSIONS_TAG: ${{ needs.meta.outputs.previous-compute-release }} NEW_COMPUTE_TAG: ${{ needs.meta.outputs.build-tag }} OLD_COMPUTE_TAG: ${{ needs.meta.outputs.previous-compute-release }} run: ./docker-compose/test_extensions_upgrade.sh diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh index 6e6c41538d..57c0182162 100755 --- a/docker-compose/test_extensions_upgrade.sh +++ b/docker-compose/test_extensions_upgrade.sh @@ -58,7 +58,7 @@ function check_timeline() { # Accepts the tag for the compute node and the timeline as parameters. function restart_compute() { docker compose down compute compute_is_ready - COMPUTE_TAG=${1} TAG=${OLD_COMPUTE_TAG} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready + COMPUTE_TAG=${1} TENANT_ID=${tenant_id} TIMELINE_ID=${2} docker compose up --quiet-pull -d --build compute compute_is_ready wait_for_ready check_timeline ${2} } @@ -82,7 +82,7 @@ EXTENSIONS='[ {"extname": "pg_repack", "extdir": "pg_repack-src"} ]' EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -) -TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d +COMPUTE_TAG=${NEW_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" @@ -90,7 +90,7 @@ create_extensions "${EXTNAMES}" query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')" new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query") docker compose --profile test-extensions down -TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate +COMPUTE_TAG=${OLD_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" From 7c53fd0d56083cb4e0becff87292d3d0406943eb Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 28 Feb 2025 13:31:52 +0100 Subject: [PATCH 056/207] refactor(page_service / timeline::handle): the GateGuard need not be a special case (#11030) # Changes While working on - https://github.com/neondatabase/neon/pull/7202 I found myself needing to cache another expensive Arc::clone inside inside the timeline::handle::Cache by wrapping it in another Arc. Before this PR, it seemed like the only expensive thing we were caching was the connection handler tasks' clone of `Arc`. But in fact the GateGuard was another such thing, but it was special-cased in the implementation. So, this refactoring PR de-special-cases the GateGuard. # Performance With this PR we are doing strictly _less_ operations per `Cache::get`. The reason is that we wrap the entire `Types::Timeline` into one Arc. Before this PR, it was a separate Arc around the Arc and one around the Arc. With this PR, we avoid an allocation per cached item, namely, the separate Arc around the GateGuard. This PR does not change the amount of shared mutable state. So, all in all, it should be a net positive, albeit probably not noticable with our small non-NUMA instances and generally high CPU usage per request. # Reviewing To understand the refactoring logistics, look at the changes to the unit test types first. Then read the improved module doc comment. Then the remaining changes. In the future, we could rename things to be even more generic. For example, `Types::TenantMgr` could really be a `Types::Resolver`. And `Types::Timeline` should, to avoid constant confusion in the doc comment, be called `Types::Cached` or `Types::Resolved`. Because the `handle` module, after this PR, really doesn't care that we're using it for storing Arc's and GateGuards. Then again, specicifity is sometimes more useful than being generic. And writing the module doc comment in a totally generic way would probably also be more confusing than helpful. --- pageserver/src/page_service.rs | 46 ++++-- pageserver/src/tenant/timeline/handle.rs | 195 ++++++++++++----------- 2 files changed, 132 insertions(+), 109 deletions(-) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 8972515163..603a5f65aa 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -392,10 +392,6 @@ impl TimelineHandles { .await .map_err(|e| match e { timeline::handle::GetError::TenantManager(e) => e, - timeline::handle::GetError::TimelineGateClosed => { - trace!("timeline gate closed"); - GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) - } timeline::handle::GetError::PerTimelineStateShutDown => { trace!("per-timeline state shut down"); GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) @@ -422,24 +418,33 @@ pub(crate) struct TenantManagerTypes; impl timeline::handle::Types for TenantManagerTypes { type TenantManagerError = GetActiveTimelineError; type TenantManager = TenantManagerWrapper; - type Timeline = Arc; + type Timeline = TenantManagerCacheItem; } -impl timeline::handle::ArcTimeline for Arc { - fn gate(&self) -> &utils::sync::gate::Gate { - &self.gate - } +pub(crate) struct TenantManagerCacheItem { + pub(crate) timeline: Arc, + #[allow(dead_code)] // we store it to keep the gate open + pub(crate) gate_guard: GateGuard, +} +impl std::ops::Deref for TenantManagerCacheItem { + type Target = Arc; + fn deref(&self) -> &Self::Target { + &self.timeline + } +} + +impl timeline::handle::Timeline for TenantManagerCacheItem { fn shard_timeline_id(&self) -> timeline::handle::ShardTimelineId { - Timeline::shard_timeline_id(self) + Timeline::shard_timeline_id(&self.timeline) } fn per_timeline_state(&self) -> &timeline::handle::PerTimelineState { - &self.handles + &self.timeline.handles } fn get_shard_identity(&self) -> &pageserver_api::shard::ShardIdentity { - Timeline::get_shard_identity(self) + Timeline::get_shard_identity(&self.timeline) } } @@ -448,7 +453,7 @@ impl timeline::handle::TenantManager for TenantManagerWrappe &self, timeline_id: TimelineId, shard_selector: ShardSelector, - ) -> Result, GetActiveTimelineError> { + ) -> Result { let tenant_id = self.tenant_id.get().expect("we set this in get()"); let timeout = ACTIVE_TENANT_TIMEOUT; let wait_start = Instant::now(); @@ -491,7 +496,20 @@ impl timeline::handle::TenantManager for TenantManagerWrappe let timeline = tenant_shard .get_timeline(timeline_id, true) .map_err(GetActiveTimelineError::Timeline)?; - Ok(timeline) + + let gate_guard = match timeline.gate.enter() { + Ok(guard) => guard, + Err(_) => { + return Err(GetActiveTimelineError::Timeline( + GetTimelineError::ShuttingDown, + )); + } + }; + + Ok(TenantManagerCacheItem { + timeline, + gate_guard, + }) } } diff --git a/pageserver/src/tenant/timeline/handle.rs b/pageserver/src/tenant/timeline/handle.rs index 67fb89c433..809b350f38 100644 --- a/pageserver/src/tenant/timeline/handle.rs +++ b/pageserver/src/tenant/timeline/handle.rs @@ -1,5 +1,4 @@ -//! An efficient way to keep the timeline gate open without preventing -//! timeline shutdown for longer than a single call to a timeline method. +//! A cache for [`crate::tenant::mgr`]+`Tenant::get_timeline`+`Timeline::gate.enter()`. //! //! # Motivation //! @@ -19,27 +18,32 @@ //! we hold the Timeline gate open while we're invoking the method on the //! Timeline object. //! -//! However, we want to avoid the overhead of entering the gate for every -//! method invocation. -//! -//! Further, for shard routing, we want to avoid calling the tenant manager to -//! resolve the shard for every request. Instead, we want to cache the -//! routing result so we can bypass the tenant manager for all subsequent requests -//! that get routed to that shard. +//! We want to avoid the overhead of doing, for each incoming request, +//! - tenant manager lookup (global rwlock + btreemap lookup for shard routing) +//! - cloning the `Arc` out of the tenant manager so we can +//! release the mgr rwlock before doing any request processing work +//! - re-entering the Timeline gate for each Timeline method invocation. //! //! Regardless of how we accomplish the above, it should not //! prevent the Timeline from shutting down promptly. //! +//! //! # Design //! //! ## Data Structures //! -//! There are three user-facing data structures: +//! There are two concepts expressed as associated types in the `Types` trait: +//! - `TenantManager`: the thing that performs the expensive work. It produces +//! a `Timeline` object, which is the other associated type. +//! - `Timeline`: the item that we cache for fast (TenantTimelineId,ShardSelector) lookup. +//! +//! There are three user-facing data structures exposed by this module: //! - `PerTimelineState`: a struct embedded into each Timeline struct. Lifetime == Timeline lifetime. //! - `Cache`: a struct private to each connection handler; Lifetime == connection lifetime. -//! - `Handle`: a smart pointer that holds the Timeline gate open and derefs to `&Timeline`. +//! - `Handle`: a smart pointer that derefs to the Types::Timeline. //! - `WeakHandle`: downgrade of a `Handle` that does not keep the gate open, but allows -//! trying to ugprade back to a `Handle`, guaranteeing it's the same `Timeline` *object*. +//! trying to ugprade back to a `Handle`. If successful, a re-upgraded Handle will always +//! point to the same cached `Types::Timeline`. Upgrades never invoke the `TenantManager`. //! //! Internally, there is 0 or 1 `HandleInner` per `(Cache,Timeline)`. //! Since Cache:Connection is 1:1, there is 0 or 1 `HandleInner` per `(Connection,Timeline)`. @@ -64,11 +68,14 @@ //! //! To dispatch a request, the page service connection calls `Cache::get`. //! -//! A cache miss means we consult the tenant manager for shard routing, -//! resulting in an `Arc`. We enter its gate _once_ and store it in the the -//! `Arc>>`. A weak ref is stored in the `Cache` +//! A cache miss means we call Types::TenantManager::resolve for shard routing, +//! cloning the `Arc` out of it, and entering the gate. The result of +//! resolve() is the object we want to cache, and return `Handle`s to for subseqent `Cache::get` calls. +//! +//! We wrap the object returned from resolve() in an `Arc` and store that inside the +//! `Arc>>`. A weak ref to the HandleInner is stored in the `Cache` //! and a strong ref in the `PerTimelineState`. -//! A strong ref is returned wrapped in a `Handle`. +//! Another strong ref is returned wrapped in a `Handle`. //! //! For subsequent requests, `Cache::get` will perform a "fast path" shard routing //! and find the weak ref in the cache. @@ -78,51 +85,51 @@ //! While a request is batching, the `Handle` is downgraded to a `WeakHandle`. //! When the batch is ready to be executed, the `WeakHandle` is upgraded back to a `Handle` //! and the request handler dispatches the request to the right `>::$request_method`. -//! It then drops the `Handle`, which drops the `Arc`. +//! It then drops the `Handle`, and thus the `Arc>` inside it. //! //! # Performance //! //! Remember from the introductory section: //! -//! > However, we want to avoid the overhead of entering the gate for every -//! > method invocation. +//! > We want to avoid the overhead of doing, for each incoming request, +//! > - tenant manager lookup (global rwlock + btreemap lookup for shard routing) +//! > - cloning the `Arc` out of the tenant manager so we can +//! > release the mgr rwlock before doing any request processing work +//! > - re-entering the Timeline gate for each Timeline method invocation. //! -//! Why do we want to avoid that? -//! Because the gate is a shared location in memory and entering it involves -//! bumping refcounts, which leads to cache contention if done frequently -//! from multiple cores in parallel. +//! All of these boil down to some state that is either globally shared among all shards +//! or state shared among all tasks that serve a particular timeline. +//! It is either protected by RwLock or manipulated via atomics. +//! Even atomics are costly when shared across multiple cores. +//! So, we want to avoid any permanent need for coordination between page_service tasks. //! -//! So, we only acquire the `GateGuard` once on `Cache` miss, and wrap it in an `Arc`. -//! That `Arc` is private to the `HandleInner` and hence to the connection. +//! The solution is to add indirection: we wrap the Types::Timeline object that is +//! returned by Types::TenantManager into an Arc that is rivate to the `HandleInner` +//! and hence to the single Cache / page_service connection. //! (Review the "Data Structures" section if that is unclear to you.) //! -//! A `WeakHandle` is a weak ref to the `HandleInner`. -//! When upgrading a `WeakHandle`, we upgrade to a strong ref to the `HandleInner` and -//! further acquire an additional strong ref to the `Arc` inside it. -//! Again, this manipulation of ref counts is is cheap because `Arc` is private to the connection. //! -//! When downgrading a `Handle` to a `WeakHandle`, we drop the `Arc`. -//! Again, this is cheap because the `Arc` is private to the connection. +//! When upgrading a `WeakHandle`, we upgrade its weak to a strong ref (of the `Mutex`), +//! lock the mutex, take out a clone of the `Arc`, and drop the Mutex. +//! The Mutex is not contended because it is private to the connection. +//! And again, the `Arc` clone is cheap because that wrapper +//! Arc's refcounts are private to the connection. +//! +//! Downgrading drops these two Arcs, which again, manipulates refcounts that are private to the connection. //! -//! In addition to the GateGuard, we need to provide `Deref` impl. -//! For this, both `Handle` need infallible access to an `Arc`. -//! We could clone the `Arc` when upgrading a `WeakHandle`, but that would cause contention -//! on the shared memory location that trakcs the refcount of the `Arc`. -//! Instead, we wrap the `Arc` into another `Arc`. -//! so that we can clone it cheaply when upgrading a `WeakHandle`. //! //! # Shutdown //! //! The attentive reader may have noticed the following reference cycle around the `Arc`: //! //! ```text -//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Timeline +//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Types::Timeline --strong--> Timeline //! ``` //! //! Further, there is this cycle: //! //! ```text -//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> GateGuard --keepalive--> Timeline +//! Timeline --owns--> PerTimelineState --strong--> HandleInner --strong--> Types::Timeline --strong--> GateGuard --keepalive--> Timeline //! ``` //! //! The former cycle is a memory leak if not broken. @@ -135,9 +142,12 @@ //! - Timeline shutdown (=> `PerTimelineState::shutdown`) //! - Connection shutdown (=> dropping the `Cache`). //! -//! Both transition the `HandleInner` from [`HandleInner::KeepingTimelineGateOpen`] to -//! [`HandleInner::ShutDown`], which drops the only long-lived strong ref to the -//! `Arc`. +//! Both transition the `HandleInner` from [`HandleInner::Open`] to +//! [`HandleInner::ShutDown`], which drops the only long-lived +//! `Arc`. Once the last short-lived Arc +//! is dropped, the `Types::Timeline` gets dropped and thereby +//! the `GateGuard` and the `Arc` that it stores, +//! thereby breaking both cycles. //! //! `PerTimelineState::shutdown` drops all the `HandleInners` it contains, //! thereby breaking the cycle. @@ -216,7 +226,7 @@ use crate::tenant::mgr::ShardSelector; pub(crate) trait Types: Sized + std::fmt::Debug { type TenantManagerError: Sized + std::fmt::Debug; type TenantManager: TenantManager + Sized; - type Timeline: ArcTimeline + Sized; + type Timeline: Timeline + Sized; } /// Uniquely identifies a [`Cache`] instance over the lifetime of the process. @@ -261,20 +271,15 @@ pub(crate) struct ShardTimelineId { /// See module-level comment. pub(crate) struct Handle { - timeline: Arc, - #[allow(dead_code)] // the field exists to keep the gate open - gate_guard: Arc, inner: Arc>>, + open: Arc, } pub(crate) struct WeakHandle { inner: Weak>>, } + enum HandleInner { - KeepingTimelineGateOpen { - #[allow(dead_code)] - gate_guard: Arc, - timeline: Arc, - }, + Open(Arc), ShutDown, } @@ -307,8 +312,7 @@ pub(crate) trait TenantManager { } /// Abstract view of an [`Arc`], for testability. -pub(crate) trait ArcTimeline: Clone { - fn gate(&self) -> &utils::sync::gate::Gate; +pub(crate) trait Timeline { fn shard_timeline_id(&self) -> ShardTimelineId; fn get_shard_identity(&self) -> &ShardIdentity; fn per_timeline_state(&self) -> &PerTimelineState; @@ -318,7 +322,6 @@ pub(crate) trait ArcTimeline: Clone { #[derive(Debug)] pub(crate) enum GetError { TenantManager(T::TenantManagerError), - TimelineGateClosed, PerTimelineStateShutDown, } @@ -434,21 +437,9 @@ impl Cache { } trace!("creating new HandleInner"); - let handle_inner_arc = Arc::new(Mutex::new(HandleInner::KeepingTimelineGateOpen { - gate_guard: Arc::new( - // this enter() is expensive in production code because - // it hits the global Arc::gate refcounts - match timeline.gate().enter() { - Ok(guard) => guard, - Err(_) => { - return Err(GetError::TimelineGateClosed); - } - }, - ), - // this clone is expensive in production code because - // it hits the global Arc::clone refcounts - timeline: Arc::new(timeline.clone()), - })); + let timeline = Arc::new(timeline); + let handle_inner_arc = + Arc::new(Mutex::new(HandleInner::Open(Arc::clone(&timeline)))); let handle_weak = WeakHandle { inner: Arc::downgrade(&handle_inner_arc), }; @@ -503,18 +494,10 @@ impl WeakHandle { }; let lock_guard = inner.lock().expect("poisoned"); match &*lock_guard { - HandleInner::KeepingTimelineGateOpen { - timeline, - gate_guard, - } => { - let gate_guard = Arc::clone(gate_guard); - let timeline = Arc::clone(timeline); + HandleInner::Open(open) => { + let open = Arc::clone(open); drop(lock_guard); - Ok(Handle { - timeline, - gate_guard, - inner, - }) + Ok(Handle { open, inner }) } HandleInner::ShutDown => Err(HandleUpgradeError::ShutDown), } @@ -528,7 +511,7 @@ impl WeakHandle { impl std::ops::Deref for Handle { type Target = T::Timeline; fn deref(&self) -> &Self::Target { - &self.timeline + &self.open } } @@ -545,7 +528,7 @@ impl PerTimelineState { /// to the [`Types::Timeline`] that embeds this per-timeline state. /// Even if [`TenantManager::resolve`] would still resolve to it. /// - /// Already-alive [`Handle`]s for will remain open, usable, and keeping the [`ArcTimeline`] alive. + /// Already-alive [`Handle`]s for will remain open, usable, and keeping the [`Types::Timeline`] alive. /// That's ok because they're short-lived. See module-level comment for details. #[instrument(level = "trace", skip_all)] pub(super) fn shutdown(&self) { @@ -611,7 +594,7 @@ impl Drop for Cache { impl HandleInner { fn shutdown(&mut self) -> Option> { match std::mem::replace(self, HandleInner::ShutDown) { - HandleInner::KeepingTimelineGateOpen { timeline, .. } => Some(timeline), + HandleInner::Open(timeline) => Some(timeline), HandleInner::ShutDown => { // Duplicate shutdowns are possible because both Cache::drop and PerTimelineState::shutdown // may do it concurrently, but locking rules disallow holding per-timeline-state lock and @@ -631,6 +614,7 @@ mod tests { use pageserver_api::reltag::RelTag; use pageserver_api::shard::ShardStripeSize; use utils::shard::ShardCount; + use utils::sync::gate::GateGuard; use super::*; @@ -641,7 +625,7 @@ mod tests { impl Types for TestTypes { type TenantManagerError = anyhow::Error; type TenantManager = StubManager; - type Timeline = Arc; + type Timeline = Entered; } struct StubManager { @@ -656,17 +640,19 @@ mod tests { myself: Weak, } + struct Entered { + timeline: Arc, + #[allow(dead_code)] // it's stored here to keep the gate open + gate_guard: Arc, + } + impl StubTimeline { fn getpage(&self) { // do nothing } } - impl ArcTimeline for Arc { - fn gate(&self) -> &utils::sync::gate::Gate { - &self.gate - } - + impl Timeline for Entered { fn shard_timeline_id(&self) -> ShardTimelineId { ShardTimelineId { shard_index: self.shard.shard_index(), @@ -688,20 +674,34 @@ mod tests { &self, timeline_id: TimelineId, shard_selector: ShardSelector, - ) -> anyhow::Result> { + ) -> anyhow::Result { for timeline in &self.shards { if timeline.id == timeline_id { + let enter_gate = || { + let gate_guard = timeline.gate.enter()?; + let gate_guard = Arc::new(gate_guard); + anyhow::Ok(gate_guard) + }; match &shard_selector { ShardSelector::Zero if timeline.shard.is_shard_zero() => { - return Ok(Arc::clone(timeline)); + return Ok(Entered { + timeline: Arc::clone(timeline), + gate_guard: enter_gate()?, + }); } ShardSelector::Zero => continue, ShardSelector::Page(key) if timeline.shard.is_key_local(key) => { - return Ok(Arc::clone(timeline)); + return Ok(Entered { + timeline: Arc::clone(timeline), + gate_guard: enter_gate()?, + }); } ShardSelector::Page(_) => continue, ShardSelector::Known(idx) if idx == &timeline.shard.shard_index() => { - return Ok(Arc::clone(timeline)); + return Ok(Entered { + timeline: Arc::clone(timeline), + gate_guard: enter_gate()?, + }); } ShardSelector::Known(_) => continue, } @@ -711,6 +711,13 @@ mod tests { } } + impl std::ops::Deref for Entered { + type Target = StubTimeline; + fn deref(&self) -> &Self::Target { + &self.timeline + } + } + #[tokio::test(start_paused = true)] async fn test_timeline_shutdown() { crate::tenant::harness::setup_logging(); @@ -1038,7 +1045,6 @@ mod tests { let key = DBDIR_KEY; // Simulate 10 connections that's opened, used, and closed - let mut used_handles = vec![]; for _ in 0..10 { let mut cache = Cache::::default(); let handle = { @@ -1050,7 +1056,6 @@ mod tests { handle }; handle.getpage(); - used_handles.push(Arc::downgrade(&handle.timeline)); } // No handles exist, thus gates are closed and don't require shutdown. From c7ff3c4c9bd8b8f3aec9eb69551527ad3b105cce Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 28 Feb 2025 15:06:56 +0100 Subject: [PATCH 057/207] safekeeper: downgrade interpreted reader errors (#11034) ## Problem This `critical!` could fire on IO errors, which is just noisy. Resolves #11027. ## Summary of changes Downgrade to error, except for decode errors. These could be either data corruption or a bug, but seem worth investigating either way. --- safekeeper/src/send_interpreted_wal.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index e196f91d3c..2c1c73c25c 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -287,7 +287,13 @@ impl InterpretedWalReader { reader .run_impl(start_pos) .await - .inspect_err(|err| critical!("failed to read WAL record: {err:?}")) + .inspect_err(|err| match err { + // TODO: we may want to differentiate these errors further. + InterpretedWalReaderError::Decode(_) => { + critical!("failed to decode WAL record: {err:?}"); + } + err => error!("failed to read WAL record: {err}"), + }) } .instrument(info_span!("interpreted wal reader")), ); @@ -347,10 +353,12 @@ impl InterpretedWalReader { metric.dec(); } - if let Err(err) = self.run_impl(start_pos).await { - critical!("failed to read WAL record: {err:?}"); - } else { - info!("interpreted wal reader exiting"); + match self.run_impl(start_pos).await { + Err(err @ InterpretedWalReaderError::Decode(_)) => { + critical!("failed to decode WAL record: {err:?}"); + } + Err(err) => error!("failed to read WAL record: {err}"), + Ok(()) => info!("interpreted wal reader exiting"), } Err(CopyStreamHandlerEnd::Other(anyhow!( From d9ced89ec074dd49c9f36177df9bc6c36c315133 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 28 Feb 2025 14:20:25 +0000 Subject: [PATCH 058/207] feat(proxy): require TLS to compute if prompted by cplane (#10717) https://github.com/neondatabase/cloud/issues/23008 For TLS between proxy and compute, we are using an internally provisioned CA to sign the compute certificates. This change ensures that proxy will load them from a supplied env var pointing to the correct file - this file and env var will be configured later, using a kubernetes secret. Control plane responds with a `server_name` field if and only if the compute uses TLS. This server name is the name we use to validate the certificate. Control plane still sends us the IP to connect to as well (to support overlay IP). To support this change, I'd had to split `host` and `host_addr` into separate fields. Using `host_addr` and bypassing `lookup_addr` if possible (which is what happens in production). `host` then is only used for the TLS connection. There's no blocker to merging this. The code paths will not be triggered until the new control plane is deployed and the `enableTLS` compute flag is enabled on a project. --- .../proxy/tokio-postgres2/src/cancel_query.rs | 9 ++- libs/proxy/tokio-postgres2/src/client.rs | 2 + libs/proxy/tokio-postgres2/src/config.rs | 12 ++++ libs/proxy/tokio-postgres2/src/connect.rs | 8 ++- .../tokio-postgres2/src/connect_socket.rs | 12 +++- proxy/src/auth/backend/local.rs | 1 + proxy/src/compute.rs | 35 ++++++---- .../control_plane/client/cplane_proxy_v1.rs | 22 +++++- proxy/src/control_plane/client/mock.rs | 22 ++++-- proxy/src/control_plane/messages.rs | 6 ++ proxy/src/proxy/connect_compute.rs | 5 +- proxy/src/proxy/tests/mod.rs | 1 + proxy/src/serverless/backend.rs | 69 ++++++++++++++++--- proxy/src/serverless/conn_pool.rs | 1 + proxy/src/serverless/http_conn_pool.rs | 5 +- proxy/src/tls/client_config.rs | 40 +++++++++-- test_runner/fixtures/neon_fixtures.py | 2 + 17 files changed, 206 insertions(+), 46 deletions(-) diff --git a/libs/proxy/tokio-postgres2/src/cancel_query.rs b/libs/proxy/tokio-postgres2/src/cancel_query.rs index b65fb571e6..0bdad0b554 100644 --- a/libs/proxy/tokio-postgres2/src/cancel_query.rs +++ b/libs/proxy/tokio-postgres2/src/cancel_query.rs @@ -34,8 +34,13 @@ where .make_tls_connect(hostname) .map_err(|e| Error::tls(e.into()))?; - let socket = - connect_socket::connect_socket(&config.host, config.port, config.connect_timeout).await?; + let socket = connect_socket::connect_socket( + config.host_addr, + &config.host, + config.port, + config.connect_timeout, + ) + .await?; cancel_query_raw::cancel_query_raw(socket, ssl_mode, tls, process_id, secret_key).await } diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs index 39b1db75da..c70cb598de 100644 --- a/libs/proxy/tokio-postgres2/src/client.rs +++ b/libs/proxy/tokio-postgres2/src/client.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::fmt; +use std::net::IpAddr; use std::sync::Arc; use std::task::{Context, Poll}; use std::time::Duration; @@ -137,6 +138,7 @@ impl InnerClient { #[derive(Clone, Serialize, Deserialize)] pub struct SocketConfig { + pub host_addr: Option, pub host: Host, pub port: u16, pub connect_timeout: Option, diff --git a/libs/proxy/tokio-postgres2/src/config.rs b/libs/proxy/tokio-postgres2/src/config.rs index 4c25491b67..978d348741 100644 --- a/libs/proxy/tokio-postgres2/src/config.rs +++ b/libs/proxy/tokio-postgres2/src/config.rs @@ -1,5 +1,6 @@ //! Connection configuration. +use std::net::IpAddr; use std::time::Duration; use std::{fmt, str}; @@ -65,6 +66,7 @@ pub enum AuthKeys { /// Connection configuration. #[derive(Clone, PartialEq, Eq)] pub struct Config { + pub(crate) host_addr: Option, pub(crate) host: Host, pub(crate) port: u16, @@ -83,6 +85,7 @@ impl Config { /// Creates a new configuration. pub fn new(host: String, port: u16) -> Config { Config { + host_addr: None, host: Host::Tcp(host), port, password: None, @@ -163,6 +166,15 @@ impl Config { self } + pub fn set_host_addr(&mut self, addr: IpAddr) -> &mut Config { + self.host_addr = Some(addr); + self + } + + pub fn get_host_addr(&self) -> Option { + self.host_addr + } + /// Sets the SSL configuration. /// /// Defaults to `prefer`. diff --git a/libs/proxy/tokio-postgres2/src/connect.rs b/libs/proxy/tokio-postgres2/src/connect.rs index d2bd0dfbcd..7c3a358bba 100644 --- a/libs/proxy/tokio-postgres2/src/connect.rs +++ b/libs/proxy/tokio-postgres2/src/connect.rs @@ -1,3 +1,5 @@ +use std::net::IpAddr; + use postgres_protocol2::message::backend::Message; use tokio::net::TcpStream; use tokio::sync::mpsc; @@ -25,13 +27,14 @@ where .make_tls_connect(hostname) .map_err(|e| Error::tls(e.into()))?; - match connect_once(&config.host, config.port, tls, config).await { + match connect_once(config.host_addr, &config.host, config.port, tls, config).await { Ok((client, connection)) => Ok((client, connection)), Err(e) => Err(e), } } async fn connect_once( + host_addr: Option, host: &Host, port: u16, tls: T, @@ -40,7 +43,7 @@ async fn connect_once( where T: TlsConnect, { - let socket = connect_socket(host, port, config.connect_timeout).await?; + let socket = connect_socket(host_addr, host, port, config.connect_timeout).await?; let RawConnection { stream, parameters, @@ -50,6 +53,7 @@ where } = connect_raw(socket, tls, config).await?; let socket_config = SocketConfig { + host_addr, host: host.clone(), port, connect_timeout: config.connect_timeout, diff --git a/libs/proxy/tokio-postgres2/src/connect_socket.rs b/libs/proxy/tokio-postgres2/src/connect_socket.rs index 15411f7ef3..8c7d300451 100644 --- a/libs/proxy/tokio-postgres2/src/connect_socket.rs +++ b/libs/proxy/tokio-postgres2/src/connect_socket.rs @@ -1,5 +1,6 @@ use std::future::Future; use std::io; +use std::net::{IpAddr, SocketAddr}; use std::time::Duration; use tokio::net::{self, TcpStream}; @@ -9,15 +10,20 @@ use crate::Error; use crate::config::Host; pub(crate) async fn connect_socket( + host_addr: Option, host: &Host, port: u16, connect_timeout: Option, ) -> Result { match host { Host::Tcp(host) => { - let addrs = net::lookup_host((&**host, port)) - .await - .map_err(Error::connect)?; + let addrs = match host_addr { + Some(addr) => vec![SocketAddr::new(addr, port)], + None => net::lookup_host((&**host, port)) + .await + .map_err(Error::connect)? + .collect(), + }; let mut last_err = None; diff --git a/proxy/src/auth/backend/local.rs b/proxy/src/auth/backend/local.rs index 9c3a3772cd..7a6dceb194 100644 --- a/proxy/src/auth/backend/local.rs +++ b/proxy/src/auth/backend/local.rs @@ -35,6 +35,7 @@ impl LocalBackend { endpoint_id: EndpointIdTag::get_interner().get_or_intern("local"), project_id: ProjectIdTag::get_interner().get_or_intern("local"), branch_id: BranchIdTag::get_interner().get_or_intern("local"), + compute_id: "local".into(), cold_start_info: ColdStartInfo::WarmCached, }, }, diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 5447a4a4c0..2560187608 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -1,3 +1,4 @@ +use std::fmt::Debug; use std::io; use std::net::SocketAddr; use std::time::Duration; @@ -10,7 +11,7 @@ use postgres_protocol::message::backend::NoticeResponseBody; use pq_proto::StartupMessageParams; use rustls::pki_types::InvalidDnsNameError; use thiserror::Error; -use tokio::net::TcpStream; +use tokio::net::{TcpStream, lookup_host}; use tracing::{debug, error, info, warn}; use crate::auth::backend::ComputeUserInfo; @@ -180,21 +181,19 @@ impl ConnCfg { use postgres_client::config::Host; // wrap TcpStream::connect with timeout - let connect_with_timeout = |host, port| { - tokio::time::timeout(timeout, TcpStream::connect((host, port))).map( - move |res| match res { - Ok(tcpstream_connect_res) => tcpstream_connect_res, - Err(_) => Err(io::Error::new( - io::ErrorKind::TimedOut, - format!("exceeded connection timeout {timeout:?}"), - )), - }, - ) + let connect_with_timeout = |addrs| { + tokio::time::timeout(timeout, TcpStream::connect(addrs)).map(move |res| match res { + Ok(tcpstream_connect_res) => tcpstream_connect_res, + Err(_) => Err(io::Error::new( + io::ErrorKind::TimedOut, + format!("exceeded connection timeout {timeout:?}"), + )), + }) }; - let connect_once = |host, port| { - debug!("trying to connect to compute node at {host}:{port}"); - connect_with_timeout(host, port).and_then(|stream| async { + let connect_once = |addrs| { + debug!("trying to connect to compute node at {addrs:?}"); + connect_with_timeout(addrs).and_then(|stream| async { let socket_addr = stream.peer_addr()?; let socket = socket2::SockRef::from(&stream); // Disable Nagle's algorithm to not introduce latency between @@ -216,7 +215,12 @@ impl ConnCfg { Host::Tcp(host) => host.as_str(), }; - match connect_once(host, port).await { + let addrs = match self.0.get_host_addr() { + Some(addr) => vec![SocketAddr::new(addr, port)], + None => lookup_host((host, port)).await?.collect(), + }; + + match connect_once(&*addrs).await { Ok((sockaddr, stream)) => Ok((sockaddr, stream, host)), Err(err) => { warn!("couldn't connect to compute node at {host}:{port}: {err}"); @@ -277,6 +281,7 @@ impl ConnCfg { } = connection; tracing::Span::current().record("pid", tracing::field::display(process_id)); + tracing::Span::current().record("compute_id", tracing::field::display(&aux.compute_id)); let stream = stream.into_inner(); // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) diff --git a/proxy/src/control_plane/client/cplane_proxy_v1.rs b/proxy/src/control_plane/client/cplane_proxy_v1.rs index 977fcf4727..2765aaa462 100644 --- a/proxy/src/control_plane/client/cplane_proxy_v1.rs +++ b/proxy/src/control_plane/client/cplane_proxy_v1.rs @@ -1,5 +1,7 @@ //! Production console backend. +use std::net::IpAddr; +use std::str::FromStr; use std::sync::Arc; use std::time::Duration; @@ -274,11 +276,27 @@ impl NeonControlPlaneClient { Some(x) => x, }; + let host_addr = IpAddr::from_str(host).ok(); + + let ssl_mode = match &body.server_name { + Some(_) => SslMode::Require, + None => SslMode::Disable, + }; + let host_name = match body.server_name { + Some(host) => host, + None => host.to_owned(), + }; + // Don't set anything but host and port! This config will be cached. // We'll set username and such later using the startup message. // TODO: add more type safety (in progress). - let mut config = compute::ConnCfg::new(host.to_owned(), port); - config.ssl_mode(SslMode::Disable); // TLS is not configured on compute nodes. + let mut config = compute::ConnCfg::new(host_name, port); + + if let Some(addr) = host_addr { + config.set_host_addr(addr); + } + + config.ssl_mode(ssl_mode); let node = NodeInfo { config, diff --git a/proxy/src/control_plane/client/mock.rs b/proxy/src/control_plane/client/mock.rs index 7da5464aa5..ee722e839e 100644 --- a/proxy/src/control_plane/client/mock.rs +++ b/proxy/src/control_plane/client/mock.rs @@ -1,5 +1,6 @@ //! Mock console backend which relies on a user-provided postgres instance. +use std::net::{IpAddr, Ipv4Addr}; use std::str::FromStr; use std::sync::Arc; @@ -167,10 +168,22 @@ impl MockControlPlane { } async fn do_wake_compute(&self) -> Result { - let mut config = compute::ConnCfg::new( - self.endpoint.host_str().unwrap_or("localhost").to_owned(), - self.endpoint.port().unwrap_or(5432), - ); + let port = self.endpoint.port().unwrap_or(5432); + let mut config = match self.endpoint.host_str() { + None => { + let mut config = compute::ConnCfg::new("localhost".to_string(), port); + config.set_host_addr(IpAddr::V4(Ipv4Addr::LOCALHOST)); + config + } + Some(host) => { + let mut config = compute::ConnCfg::new(host.to_string(), port); + if let Ok(addr) = IpAddr::from_str(host) { + config.set_host_addr(addr); + } + config + } + }; + config.ssl_mode(postgres_client::config::SslMode::Disable); let node = NodeInfo { @@ -179,6 +192,7 @@ impl MockControlPlane { endpoint_id: (&EndpointId::from("endpoint")).into(), project_id: (&ProjectId::from("project")).into(), branch_id: (&BranchId::from("branch")).into(), + compute_id: "compute".into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, }; diff --git a/proxy/src/control_plane/messages.rs b/proxy/src/control_plane/messages.rs index 8d6b2e96f5..ec4554eab5 100644 --- a/proxy/src/control_plane/messages.rs +++ b/proxy/src/control_plane/messages.rs @@ -2,6 +2,7 @@ use std::fmt::{self, Display}; use measured::FixedCardinalityLabel; use serde::{Deserialize, Serialize}; +use smol_str::SmolStr; use crate::auth::IpPattern; use crate::intern::{AccountIdInt, BranchIdInt, EndpointIdInt, ProjectIdInt, RoleNameInt}; @@ -239,6 +240,7 @@ pub(crate) struct GetEndpointAccessControl { #[derive(Debug, Deserialize)] pub(crate) struct WakeCompute { pub(crate) address: Box, + pub(crate) server_name: Option, pub(crate) aux: MetricsAuxInfo, } @@ -312,6 +314,9 @@ pub(crate) struct MetricsAuxInfo { pub(crate) endpoint_id: EndpointIdInt, pub(crate) project_id: ProjectIdInt, pub(crate) branch_id: BranchIdInt, + // note: we don't use interned strings for compute IDs. + // they churn too quickly and we have no way to clean up interned strings. + pub(crate) compute_id: SmolStr, #[serde(default)] pub(crate) cold_start_info: ColdStartInfo, } @@ -378,6 +383,7 @@ mod tests { "endpoint_id": "endpoint", "project_id": "project", "branch_id": "branch", + "compute_id": "compute", "cold_start_info": "unknown", }) } diff --git a/proxy/src/proxy/connect_compute.rs b/proxy/src/proxy/connect_compute.rs index b8b39fa121..e013fbbe2e 100644 --- a/proxy/src/proxy/connect_compute.rs +++ b/proxy/src/proxy/connect_compute.rs @@ -81,7 +81,10 @@ impl ConnectMechanism for TcpMechanism<'_> { type ConnectError = compute::ConnectionError; type Error = compute::ConnectionError; - #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] + #[tracing::instrument(skip_all, fields( + pid = tracing::field::Empty, + compute_id = tracing::field::Empty + ))] async fn connect_once( &self, ctx: &RequestContext, diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index 171f539b1e..e0b7539538 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -555,6 +555,7 @@ fn helper_create_cached_node_info(cache: &'static NodeInfoCache) -> CachedNodeIn endpoint_id: (&EndpointId::from("endpoint")).into(), project_id: (&ProjectId::from("project")).into(), branch_id: (&BranchId::from("branch")).into(), + compute_id: "compute".into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, }; diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index 72029102e0..b55661cec8 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -1,4 +1,5 @@ use std::io; +use std::net::{IpAddr, SocketAddr}; use std::sync::Arc; use std::time::Duration; @@ -6,11 +7,15 @@ use async_trait::async_trait; use ed25519_dalek::SigningKey; use hyper_util::rt::{TokioExecutor, TokioIo, TokioTimer}; use jose_jwk::jose_b64; +use postgres_client::config::SslMode; use rand::rngs::OsRng; +use rustls::pki_types::{DnsName, ServerName}; use tokio::net::{TcpStream, lookup_host}; +use tokio_rustls::TlsConnector; use tracing::field::display; use tracing::{debug, info}; +use super::AsyncRW; use super::conn_pool::poll_client; use super::conn_pool_lib::{Client, ConnInfo, EndpointConnPool, GlobalConnPool}; use super::http_conn_pool::{self, HttpConnPool, Send, poll_http2_client}; @@ -190,7 +195,11 @@ impl PoolingBackend { // Wake up the destination if needed. Code here is a bit involved because // we reuse the code from the usual proxy and we need to prepare few structures // that this code expects. - #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] + #[tracing::instrument(skip_all, fields( + pid = tracing::field::Empty, + compute_id = tracing::field::Empty, + conn_id = tracing::field::Empty, + ))] pub(crate) async fn connect_to_compute( &self, ctx: &RequestContext, @@ -229,7 +238,10 @@ impl PoolingBackend { } // Wake up the destination if needed - #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] + #[tracing::instrument(skip_all, fields( + compute_id = tracing::field::Empty, + conn_id = tracing::field::Empty, + ))] pub(crate) async fn connect_to_local_proxy( &self, ctx: &RequestContext, @@ -276,7 +288,10 @@ impl PoolingBackend { /// # Panics /// /// Panics if called with a non-local_proxy backend. - #[tracing::instrument(fields(pid = tracing::field::Empty), skip_all)] + #[tracing::instrument(skip_all, fields( + pid = tracing::field::Empty, + conn_id = tracing::field::Empty, + ))] pub(crate) async fn connect_to_local_postgres( &self, ctx: &RequestContext, @@ -552,6 +567,10 @@ impl ConnectMechanism for TokioMechanism { let (client, connection) = permit.release_result(res)?; tracing::Span::current().record("pid", tracing::field::display(client.get_process_id())); + tracing::Span::current().record( + "compute_id", + tracing::field::display(&node_info.aux.compute_id), + ); Ok(poll_client( self.pool.clone(), ctx, @@ -587,16 +606,28 @@ impl ConnectMechanism for HyperMechanism { node_info: &CachedNodeInfo, config: &ComputeConfig, ) -> Result { + let host_addr = node_info.config.get_host_addr(); let host = node_info.config.get_host(); let permit = self.locks.get_permit(&host).await?; let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Compute); + let tls = if node_info.config.get_ssl_mode() == SslMode::Disable { + None + } else { + Some(&config.tls) + }; + let port = node_info.config.get_port(); - let res = connect_http2(&host, port, config.timeout).await; + let res = connect_http2(host_addr, &host, port, config.timeout, tls).await; drop(pause); let (client, connection) = permit.release_result(res)?; + tracing::Span::current().record( + "compute_id", + tracing::field::display(&node_info.aux.compute_id), + ); + Ok(poll_http2_client( self.pool.clone(), ctx, @@ -612,18 +643,22 @@ impl ConnectMechanism for HyperMechanism { } async fn connect_http2( + host_addr: Option, host: &str, port: u16, timeout: Duration, + tls: Option<&Arc>, ) -> Result<(http_conn_pool::Send, http_conn_pool::Connect), LocalProxyConnError> { - // assumption: host is an ip address so this should not actually perform any requests. - // todo: add that assumption as a guarantee in the control-plane API. - let mut addrs = lookup_host((host, port)) - .await - .map_err(LocalProxyConnError::Io)?; - + let addrs = match host_addr { + Some(addr) => vec![SocketAddr::new(addr, port)], + None => lookup_host((host, port)) + .await + .map_err(LocalProxyConnError::Io)? + .collect(), + }; let mut last_err = None; + let mut addrs = addrs.into_iter(); let stream = loop { let Some(addr) = addrs.next() else { return Err(last_err.unwrap_or_else(|| { @@ -651,6 +686,20 @@ async fn connect_http2( } }; + let stream = if let Some(tls) = tls { + let host = DnsName::try_from(host) + .map_err(io::Error::other) + .map_err(LocalProxyConnError::Io)? + .to_owned(); + let stream = TlsConnector::from(tls.clone()) + .connect(ServerName::DnsName(host), stream) + .await + .map_err(LocalProxyConnError::Io)?; + Box::pin(stream) as AsyncRW + } else { + Box::pin(stream) as AsyncRW + }; + let (client, connection) = hyper::client::conn::http2::Builder::new(TokioExecutor::new()) .timer(TokioTimer::new()) .keep_alive_interval(Duration::from_secs(20)) diff --git a/proxy/src/serverless/conn_pool.rs b/proxy/src/serverless/conn_pool.rs index 6a9089fc2a..516d474a11 100644 --- a/proxy/src/serverless/conn_pool.rs +++ b/proxy/src/serverless/conn_pool.rs @@ -221,6 +221,7 @@ mod tests { endpoint_id: (&EndpointId::from("endpoint")).into(), project_id: (&ProjectId::from("project")).into(), branch_id: (&BranchId::from("branch")).into(), + compute_id: "compute".into(), cold_start_info: crate::control_plane::messages::ColdStartInfo::Warm, }, conn_id: uuid::Uuid::new_v4(), diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs index 338a79b4b3..bca2d4c165 100644 --- a/proxy/src/serverless/http_conn_pool.rs +++ b/proxy/src/serverless/http_conn_pool.rs @@ -6,9 +6,9 @@ use hyper::client::conn::http2; use hyper_util::rt::{TokioExecutor, TokioIo}; use parking_lot::RwLock; use smol_str::ToSmolStr; -use tokio::net::TcpStream; use tracing::{Instrument, debug, error, info, info_span}; +use super::AsyncRW; use super::backend::HttpConnError; use super::conn_pool_lib::{ ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, ConnPoolEntry, @@ -22,8 +22,7 @@ use crate::types::EndpointCacheKey; use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS}; pub(crate) type Send = http2::SendRequest; -pub(crate) type Connect = - http2::Connection, hyper::body::Incoming, TokioExecutor>; +pub(crate) type Connect = http2::Connection, hyper::body::Incoming, TokioExecutor>; #[derive(Clone)] pub(crate) struct ClientDataHttp(); diff --git a/proxy/src/tls/client_config.rs b/proxy/src/tls/client_config.rs index a2d695aae1..ce873e678e 100644 --- a/proxy/src/tls/client_config.rs +++ b/proxy/src/tls/client_config.rs @@ -1,17 +1,49 @@ +use std::env; +use std::io::Cursor; +use std::path::PathBuf; use std::sync::Arc; -use anyhow::bail; +use anyhow::{Context, bail}; use rustls::crypto::ring; -pub(crate) fn load_certs() -> anyhow::Result> { +/// We use an internal certificate authority when establishing a TLS connection with compute. +fn load_internal_certs(store: &mut rustls::RootCertStore) -> anyhow::Result<()> { + let Some(ca_file) = env::var_os("NEON_INTERNAL_CA_FILE") else { + return Ok(()); + }; + let ca_file = PathBuf::from(ca_file); + + let ca = std::fs::read(&ca_file) + .with_context(|| format!("could not read CA from {}", ca_file.display()))?; + + for cert in rustls_pemfile::certs(&mut Cursor::new(&*ca)) { + store + .add(cert.context("could not parse internal CA certificate")?) + .context("could not parse internal CA certificate")?; + } + + Ok(()) +} + +/// For console redirect proxy, we need to establish a connection to compute via pg-sni-router. +/// pg-sni-router needs TLS and uses a Let's Encrypt signed certificate, so we +/// load certificates from our native store. +fn load_native_certs(store: &mut rustls::RootCertStore) -> anyhow::Result<()> { let der_certs = rustls_native_certs::load_native_certs(); if !der_certs.errors.is_empty() { bail!("could not parse certificates: {:?}", der_certs.errors); } - let mut store = rustls::RootCertStore::empty(); store.add_parsable_certificates(der_certs.certs); + + Ok(()) +} + +fn load_compute_certs() -> anyhow::Result> { + let mut store = rustls::RootCertStore::empty(); + load_native_certs(&mut store)?; + load_internal_certs(&mut store)?; Ok(Arc::new(store)) } @@ -22,7 +54,7 @@ pub fn compute_client_config_with_root_certs() -> anyhow::Result Date: Fri, 28 Feb 2025 15:49:09 +0000 Subject: [PATCH 059/207] storcon: soft disable SK heartbeats (#11041) ## Problem JWT tokens aren't in place, so all SK heartbeats fail. This is equivalent to a wait before applying the PS heartbeats and makes things more flaky. ## Summary of Changes Add a flag that skips loading SKs from the db on start-up and at runtime. --- control_plane/src/local_env.rs | 3 +++ control_plane/src/storage_controller.rs | 4 ++++ storage_controller/src/main.rs | 5 +++++ storage_controller/src/service.rs | 26 ++++++++++++++++--------- test_runner/fixtures/neon_fixtures.py | 7 +++++++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index f4026efbbf..da7d7e5469 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -165,6 +165,8 @@ pub struct NeonStorageControllerConf { #[serde(with = "humantime_serde")] pub long_reconcile_threshold: Option, + + pub load_safekeepers: bool, } impl NeonStorageControllerConf { @@ -188,6 +190,7 @@ impl Default for NeonStorageControllerConf { max_secondary_lag_bytes: None, heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL, long_reconcile_threshold: None, + load_safekeepers: true, } } } diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 16e12f4e02..77a9075aa7 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -537,6 +537,10 @@ impl StorageController { args.push("--start-as-candidate".to_string()); } + if self.config.load_safekeepers { + args.push("--load-safekeepers".to_string()); + } + if let Some(private_key) = &self.private_key { let claims = Claims::new(None, Scope::PageServerApi); let jwt_token = diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 04dd3bb3f6..380ffeb9b7 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -138,6 +138,10 @@ struct Cli { // Flag to use https for requests to pageserver API. #[arg(long, default_value = "false")] use_https_pageserver_api: bool, + + /// Whether to load safekeeprs from the database and heartbeat them + #[arg(long, default_value = "false")] + load_safekeepers: bool, } enum StrictMode { @@ -350,6 +354,7 @@ async fn async_main() -> anyhow::Result<()> { start_as_candidate: args.start_as_candidate, http_service_port: args.listen.port() as i32, use_https_pageserver_api: args.use_https_pageserver_api, + load_safekeepers: args.load_safekeepers, }; // Validate that we can connect to the database diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 9ba9504718..26ccfd5445 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -389,6 +389,8 @@ pub struct Config { pub long_reconcile_threshold: Duration, pub use_https_pageserver_api: bool, + + pub load_safekeepers: bool, } impl From for ApiError { @@ -1405,15 +1407,20 @@ impl Service { .set(nodes.len() as i64); tracing::info!("Loading safekeepers from database..."); - let safekeepers = persistence - .list_safekeepers() - .await? - .into_iter() - .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new())) - .collect::>(); + let safekeepers = if config.load_safekeepers { + persistence + .list_safekeepers() + .await? + .into_iter() + .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new())) + .collect::>() + } else { + tracing::info!("Skipping safekeeper loading"); + Default::default() + }; + let safekeepers: HashMap = safekeepers.into_iter().map(|n| (n.get_id(), n)).collect(); - tracing::info!("Loaded {} safekeepers from database.", safekeepers.len()); tracing::info!("Loading shards from database..."); let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?; @@ -8054,7 +8061,8 @@ impl Service { ) -> Result<(), DatabaseError> { let node_id = NodeId(record.id as u64); self.persistence.safekeeper_upsert(record.clone()).await?; - { + + if self.config.load_safekeepers { let mut locked = self.inner.write().unwrap(); let mut safekeepers = (*locked.safekeepers).clone(); match safekeepers.entry(node_id) { @@ -8086,7 +8094,7 @@ impl Service { .await?; let node_id = NodeId(id as u64); // After the change has been persisted successfully, update the in-memory state - { + if self.config.load_safekeepers { let mut locked = self.inner.write().unwrap(); let mut safekeepers = (*locked.safekeepers).clone(); let sk = safekeepers diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 11fbe30767..6001003e53 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1128,6 +1128,13 @@ class NeonEnv: if self.storage_controller_config is not None: cfg["storage_controller"] = self.storage_controller_config + # Disable new storcon flag in compat tests + if config.test_may_use_compatibility_snapshot_binaries: + if "storage_controller" in cfg: + cfg["storage_controller"]["load_safekeepers"] = False + else: + cfg["storage_controller"] = {"load_safekeepers": False} + # Create config for pageserver http_auth_type = "NeonJWT" if config.auth_enabled else "Trust" pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust" From f79ee0bb8840be01693c1344b26cc1134e5d82a1 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Fri, 28 Feb 2025 10:49:15 -0500 Subject: [PATCH 060/207] fix(storcon): loop in chaos injection (#11004) ## Problem Somehow the previous patch loses the loop in the chaos injector function so everything will only run once. https://github.com/neondatabase/neon/pull/10934 ## Summary of changes Add back the loop. Signed-off-by: Alex Chi Z --- .../src/service/chaos_injector.rs | 71 ++++++++++--------- 1 file changed, 37 insertions(+), 34 deletions(-) diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs index 2ff68d7037..a0419e0205 100644 --- a/storage_controller/src/service/chaos_injector.rs +++ b/storage_controller/src/service/chaos_injector.rs @@ -46,48 +46,51 @@ impl ChaosInjector { } } + fn get_cron_interval_sleep_future(&self) -> Option { + if let Some(ref chaos_exit_crontab) = self.chaos_exit_crontab { + match cron_to_next_duration(chaos_exit_crontab) { + Ok(interval_exit) => Some(interval_exit), + Err(e) => { + tracing::error!("Error processing the cron schedule: {e}"); + None + } + } + } else { + None + } + } + pub async fn run(&mut self, cancel: CancellationToken) { let mut interval = tokio::time::interval(self.interval); - let cron_interval = { - if let Some(ref chaos_exit_crontab) = self.chaos_exit_crontab { - match cron_to_next_duration(chaos_exit_crontab) { - Ok(interval_exit) => Some(interval_exit), - Err(e) => { - tracing::error!("Error processing the cron schedule: {e}"); - None - } - } - } else { - None - } - }; + #[derive(Debug)] enum ChaosEvent { ShuffleTenant, ForceKill, } - let chaos_type = tokio::select! { - _ = interval.tick() => { - ChaosEvent::ShuffleTenant - } - Some(_) = maybe_sleep(cron_interval) => { - ChaosEvent::ForceKill - } - _ = cancel.cancelled() => { - tracing::info!("Shutting down"); - return; - } - }; - - match chaos_type { - ChaosEvent::ShuffleTenant => { - self.inject_chaos().await; - } - ChaosEvent::ForceKill => { - self.force_kill().await; + loop { + let cron_interval = self.get_cron_interval_sleep_future(); + let chaos_type = tokio::select! { + _ = interval.tick() => { + ChaosEvent::ShuffleTenant + } + Some(_) = maybe_sleep(cron_interval) => { + ChaosEvent::ForceKill + } + _ = cancel.cancelled() => { + tracing::info!("Shutting down"); + return; + } + }; + tracing::info!("Chaos iteration: {chaos_type:?}..."); + match chaos_type { + ChaosEvent::ShuffleTenant => { + self.inject_chaos().await; + } + ChaosEvent::ForceKill => { + self.force_kill().await; + } } } - - tracing::info!("Chaos iteration..."); } /// If a shard has a secondary and attached location, then re-assign the secondary to be From d857f63e3b1be9b1b70ef3f8c64cb088d126743c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 28 Feb 2025 18:00:22 +0100 Subject: [PATCH 061/207] pageserver: fix race that can wedge background tasks (#11047) ## Problem `wait_for_active_tenant()`, used when starting background tasks, has a race condition that can cause it to wait forever (until cancelled). It first checks the current tenant state, and then subscribes for state updates, but if the state changes between these then it won't be notified about it. We've seen this wedge compaction tasks, which can cause unbounded layer file buildup and read amplification. ## Summary of changes Use `watch::Receiver::wait_for()` to check both the current and new tenant states. --- pageserver/src/tenant/tasks.rs | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/pageserver/src/tenant/tasks.rs b/pageserver/src/tenant/tasks.rs index c90f81889b..589ac5ae88 100644 --- a/pageserver/src/tenant/tasks.rs +++ b/pageserver/src/tenant/tasks.rs @@ -473,21 +473,15 @@ async fn wait_for_active_tenant( } let mut update_rx = tenant.subscribe_for_state_updates(); - loop { - tokio::select! { - _ = cancel.cancelled() => return ControlFlow::Break(()), - result = update_rx.changed() => if result.is_err() { + tokio::select! { + result = update_rx.wait_for(|s| s == &TenantState::Active) => { + if result.is_err() { return ControlFlow::Break(()); } - } - - match &*update_rx.borrow() { - TenantState::Active => { - debug!("Tenant state changed to active, continuing the task loop"); - return ControlFlow::Continue(()); - } - state => debug!("Not running the task loop, tenant is not active: {state:?}"), - } + debug!("Tenant state changed to active, continuing the task loop"); + ControlFlow::Continue(()) + }, + _ = cancel.cancelled() => ControlFlow::Break(()), } } From 56033189c10dc93fa0098f5ae77a951b481edc15 Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Fri, 28 Feb 2025 19:58:42 +0200 Subject: [PATCH 062/207] feat(proxy): Log latency after connect to compute (#11048) ## Problem To measure latency accurate we should associate the testodrome role within a latency data ## Summary of changes Add latency logging to associate different roles within a latency. Relates to the #22486 --- proxy/src/compute.rs | 5 +++-- proxy/src/context/mod.rs | 11 ++++++++++- proxy/src/metrics.rs | 28 +++++++++++++++++++++------- 3 files changed, 34 insertions(+), 10 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 2560187608..dfa6015b10 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -287,8 +287,9 @@ impl ConnCfg { // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), - "connected to compute node at {host} ({socket_addr}) sslmode={:?}", - self.0.get_ssl_mode() + "connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}", + self.0.get_ssl_mode(), + ctx.get_proxy_latency(), ); // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw. diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index f87f4e9ef8..e10a04b4f1 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -17,7 +17,8 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::error::ErrorKind; use crate::intern::{BranchIdInt, ProjectIdInt}; use crate::metrics::{ - ConnectOutcome, InvalidEndpointsGroup, LatencyTimer, Metrics, Protocol, Waiting, + ConnectOutcome, InvalidEndpointsGroup, LatencyAccumulated, LatencyTimer, Metrics, Protocol, + Waiting, }; use crate::protocol2::{ConnectionInfo, ConnectionInfoExtra}; use crate::types::{DbName, EndpointId, RoleName}; @@ -346,6 +347,14 @@ impl RequestContext { } } + pub(crate) fn get_proxy_latency(&self) -> LatencyAccumulated { + self.0 + .try_lock() + .expect("should not deadlock") + .latency_timer + .accumulated() + } + pub(crate) fn success(&self) { self.0 .try_lock() diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index db1f096de1..b6a2a059ea 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -394,21 +394,31 @@ pub enum RedisMsgKind { HDel, } -#[derive(Default)] -struct Accumulated { +#[derive(Default, Clone)] +pub struct LatencyAccumulated { cplane: time::Duration, client: time::Duration, compute: time::Duration, retry: time::Duration, } +impl std::fmt::Display for LatencyAccumulated { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "client: {:?}, cplane: {:?}, compute: {:?}, retry: {:?}", + self.client, self.cplane, self.compute, self.retry + ) + } +} + pub struct LatencyTimer { // time since the stopwatch was started start: time::Instant, // time since the stopwatch was stopped stop: Option, // accumulated time on the stopwatch - accumulated: Accumulated, + accumulated: LatencyAccumulated, // label data protocol: Protocol, cold_start_info: ColdStartInfo, @@ -422,7 +432,7 @@ impl LatencyTimer { Self { start: time::Instant::now(), stop: None, - accumulated: Accumulated::default(), + accumulated: LatencyAccumulated::default(), protocol, cold_start_info: ColdStartInfo::Unknown, // assume failed unless otherwise specified @@ -435,7 +445,7 @@ impl LatencyTimer { Self { start: time::Instant::now(), stop: None, - accumulated: Accumulated::default(), + accumulated: LatencyAccumulated::default(), protocol, cold_start_info: ColdStartInfo::Unknown, // assume failed unless otherwise specified @@ -465,6 +475,10 @@ impl LatencyTimer { // success self.outcome = ConnectOutcome::Success; } + + pub fn accumulated(&self) -> LatencyAccumulated { + self.accumulated.clone() + } } #[derive(FixedCardinalityLabel, Clone, Copy, Debug)] @@ -511,7 +525,7 @@ impl Drop for LatencyTimer { duration.saturating_sub(accumulated_total).as_secs_f64(), ); - // Exclude client cplane, compue communication from the accumulated time. + // Exclude client, cplane, compute communication from the accumulated time. let accumulated_total = self.accumulated.client + self.accumulated.cplane + self.accumulated.compute; metric.observe( @@ -524,7 +538,7 @@ impl Drop for LatencyTimer { duration.saturating_sub(accumulated_total).as_secs_f64(), ); - // Exclude client cplane, compue, retry communication from the accumulated time. + // Exclude client, cplane, compute, retry communication from the accumulated time. let accumulated_total = self.accumulated.client + self.accumulated.cplane + self.accumulated.compute From ee0c8ca8fd6a9abd5ad7bd18a8a240286f47e2f6 Mon Sep 17 00:00:00 2001 From: Suhas Thalanki <54014218+thesuhas@users.noreply.github.com> Date: Fri, 28 Feb 2025 16:07:21 -0500 Subject: [PATCH 063/207] Add -fsigned-char for cross platform signed chars (#10852) ## Problem In multi-character keys, the GIN index creates a CRC Hash of the first 3 bytes of the key. The hash can have the first bit to be set or unset, needing to have a consistent representation of `char` across architectures for consistent results. GIN stores these keys by their hashes which determines the order in which the keys are obtained from the GIN index. By default, chars are signed in x86 and unsigned in arm, leading to inconsistent behavior across different platform architectures. Adding the `-fsigned-char` flag to the GCC compiler forces chars to be treated as signed across platforms, ensuring the ordering in which the keys are obtained consistent. ## Summary of changes Added `-fsigned-char` to the `CFLAGS` to force GCC to use signed chars across platforms. Added a test to check this across platforms. Fixes: https://github.com/neondatabase/cloud/issues/23199 --- Makefile | 7 +- compute/compute-node.Dockerfile | 2 +- test_runner/regress/data/test_signed_char.out | 1 + test_runner/regress/test_signed_char.py | 64 +++++++++++++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 test_runner/regress/data/test_signed_char.out create mode 100644 test_runner/regress/test_signed_char.py diff --git a/Makefile b/Makefile index 42ee643bb5..0911465fb8 100644 --- a/Makefile +++ b/Makefile @@ -11,15 +11,16 @@ ICU_PREFIX_DIR := /usr/local/icu # BUILD_TYPE ?= debug WITH_SANITIZERS ?= no +PG_CFLAGS = -fsigned-char ifeq ($(BUILD_TYPE),release) PG_CONFIGURE_OPTS = --enable-debug --with-openssl - PG_CFLAGS = -O2 -g3 $(CFLAGS) + PG_CFLAGS += -O2 -g3 $(CFLAGS) PG_LDFLAGS = $(LDFLAGS) # Unfortunately, `--profile=...` is a nightly feature CARGO_BUILD_FLAGS += --release else ifeq ($(BUILD_TYPE),debug) PG_CONFIGURE_OPTS = --enable-debug --with-openssl --enable-cassert --enable-depend - PG_CFLAGS = -O0 -g3 $(CFLAGS) + PG_CFLAGS += -O0 -g3 $(CFLAGS) PG_LDFLAGS = $(LDFLAGS) else $(error Bad build type '$(BUILD_TYPE)', see Makefile for options) @@ -159,6 +160,8 @@ postgres-%: postgres-configure-% \ $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_visibility install +@echo "Compiling pageinspect $*" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pageinspect install + +@echo "Compiling pg_trgm $*" + $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/pg_trgm install +@echo "Compiling amcheck $*" $(MAKE) -C $(POSTGRES_INSTALL_DIR)/build/$*/contrib/amcheck install +@echo "Compiling test_decoding $*" diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 0cdb44853f..c3aecfbdc5 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -162,7 +162,7 @@ FROM build-deps AS pg-build ARG PG_VERSION COPY vendor/postgres-${PG_VERSION:?} postgres RUN cd postgres && \ - export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3' --enable-debug --with-openssl --with-uuid=ossp \ + export CONFIGURE_CMD="./configure CFLAGS='-O2 -g3 -fsigned-char' --enable-debug --with-openssl --with-uuid=ossp \ --with-icu --with-libxml --with-libxslt --with-lz4" && \ if [ "${PG_VERSION:?}" != "v14" ]; then \ # zstd is available only from PG15 diff --git a/test_runner/regress/data/test_signed_char.out b/test_runner/regress/data/test_signed_char.out new file mode 100644 index 0000000000..a68876e383 --- /dev/null +++ b/test_runner/regress/data/test_signed_char.out @@ -0,0 +1 @@ +0000000094010815f81f042000000000b89f8000909f5000689f5000489f4000309f3000189f3000009f3000e89e3000d09e3000b89e3000a09e3000889e3000709e3000309e8000189e3000009e3000e89d3000d09d3000b89d3000a09d3000889d3000709d3000589d3000409d3000289d3000109d3000f89c3000e09c3000c89c3000b09c3000989c3000809c3000689c3000509c3000389c3000209c3000089c3000f09b3000d89b3000c09b3000a89b3000909b3000789b3000609b3000489b3000309b3000189b3000009b3000e89a3000d09a3000b89a3000a09a3000889a3000489a8000309a3000189a3000009a3000e8993000d0993000b8993000a09930008899300070993000589930004099300000998000e8983000d0983000b8983000a0983000889830007098300058983000409830002898300010983000f8973000b8978000a09730008897300070973000589730004097300028973000e8968000a89680006896800028968000e8958000a8958000909530005095800038953000209530000895300000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000801000010018004c198900000000000000000029000000008010000100180049787f000000000000000000290000000080100001001800727c7000000000000000000029000000008010002800400020766200000000000000000001002700010101010101010101010101010101010101010101010101010101010101010101010101010101000080100001001800207262000000000000000000290000000080100028004000766239000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000280040006239380000000000000000000100270001010101010101010101010101010101010101010101010101010101010101010101010101010100008010002800400039383700000000000000000001002700010101010101010101010101010101010101010101010101010101010101010101010101010101000080100028004000383736000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000280040003736350000000000000000000100270001010101010101010101010101010101010101010101010101010101010101010101010101010100008010002800400036353400000000000000000001002700010101010101010101010101010101010101010101010101010101010101010101010101010101000080100001001800203034000000000000000000280000000080100001001800203933000000000000000000270000000080100001001800203833000000000000000000260000000080100001001800203733000000000000000000250000000080100001001800203633000000000000000000240000000080100001001800203533000000000000000000230000000080100028004000353433000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000010018002034330000000000000000002200000000801000010018002033330000000000000000002100000000801000010018002032330000000000000000002000000000801000010018002031330000000000000000001f00000000801000010018002030330000000000000000001e00000000801000010018002039320000000000000000001d00000000801000010018002038320000000000000000001c00000000801000010018002037320000000000000000001b00000000801000010018002036320000000000000000001a0000000080100001001800203532000000000000000000190000000080100001001800203432000000000000000000180000000080100028004000343332000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000010018002033320000000000000000001700000000801000010018002032320000000000000000001600000000801000010018002031320000000000000000001500000000801000010018002030320000000000000000001400000000801000010018002039310000000000000000001300000000801000010018002038310000000000000000001200000000801000010018002037310000000000000000001100000000801000010018002036310000000000000000001000000000801000010018002035310000000000000000000f00000000801000010018002034310000000000000000000e00000000801000010018002033310000000000000000000d0000000080100028004000333231000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000010018002032310000000000000000000c00000000801000010018002031310000000000000000000b00000000801000010018002030310000000000000000000a00000000801000010018002039200000000000000000000900000000801000010018002038200000000000000000000800000000801000010018002037200000000000000000000700000000801000010018002036200000000000000000000600000000801000010018002035200000000000000000000500000000801000010018003034200000000000000000002800000000801000010018002034200000000000000000000400000000801000010018003933200000000000000000002700000000801000010018003833200000000000000000002600000000801000010018003733200000000000000000002500000000801000010018003633200000000000000000002400000000801000010018003533200000000000000000002300000000801000010018003433200000000000000000002200000000801000010018003333200000000000000000002100000000801000010018003233200000000000000000002000000000801000010018003133200000000000000000001f00000000801000010018003033200000000000000000001e00000000801000010018002033200000000000000000000300000000801000010018003932200000000000000000001d00000000801000010018003832200000000000000000001c00000000801000010018003732200000000000000000001b00000000801000010018003632200000000000000000001a00000000801000010018003532200000000000000000001900000000801000010018003432200000000000000000001800000000801000010018003332200000000000000000001700000000801000010018003232200000000000000000001600000000801000010018003132200000000000000000001500000000801000010018003032200000000000000000001400000000801000010018002032200000000000000000000200000000801000010018003931200000000000000000001300000000801000010018003831200000000000000000001200000000801000010018003731200000000000000000001100000000801000010018003631200000000000000000001000000000801000010018003531200000000000000000000f00000000801000010018003431200000000000000000000e00000000801000010018003331200000000000000000000d0000000080100028004000323120000000000000000000010027000101010101010101010101010101010101010101010101010101010101010101010101010101010000801000010018003131200000000000000000000b00000000801000010018003031200000000000000000000a0000000080100001001800203120000000000000000000010000000080100001001800622020000000000000000000290000000080100001001800392020000000000000000000090000000080100001001800382020000000000000000000080000000080100001001800372020000000000000000000070000000080100001001800362020000000000000000000060000000080100001001800352020000000000000000000050000000080100002002000342020000000000000000000040001002400000000000000008010000b00280033202000000000000000000003000a001b010101010101010101000000000000008010000b00280032202000000000000000000002000a001201010101010101010100000000000000801000280040003120200000000000000000000100270001010101010101010101010101010101010101010101010101010101010101010101010101010100ffffffff00000200 \ No newline at end of file diff --git a/test_runner/regress/test_signed_char.py b/test_runner/regress/test_signed_char.py new file mode 100644 index 0000000000..8752a1ff3f --- /dev/null +++ b/test_runner/regress/test_signed_char.py @@ -0,0 +1,64 @@ +from pathlib import Path + +from fixtures.neon_fixtures import NeonEnv + +SIGNED_CHAR_EXTRACT = """ + WITH + -- Generates an intermediate table with block numbers of the index + pagenumbers AS ( + SELECT num FROM generate_series(0, (pg_relation_size('test_payload_idx') / 8192) - 1) it(num) + ) + SELECT num, + -- Gets the data of the page, skipping the first 8 bytes which is the LSN + substr(page, 9, 8192-8), + -- Returns information about the GIN index opaque area + (gin_page_opaque_info(page)).* + FROM pagenumbers, + -- Gets a page from the respective blocks of the table + LATERAL (SELECT get_raw_page('test_payload_idx', num)) AS p(page) + -- Filters to only return leaf pages from the GIN Index + WHERE ARRAY['leaf'] = ((gin_page_opaque_info(page)).flags); + """ + + +def test_signed_char(neon_simple_env: NeonEnv): + """ + Test that postgres was compiled with -fsigned-char. + --- + In multi-character keys, the GIN index creates a CRC Hash of the first 3 bytes of the key. + The hash can have the first bit to be set or unset, needing to have a consistent representation + of char across architectures for consistent results. GIN stores these keys by their hashes + which determines the order in which the keys are obtained from the GIN index. + Using -fsigned-char enforces this order across platforms making this consistent. + The following query gets all the data present in the leaf page of a GIN index, + which is ordered by the CRC hash and is consistent across platforms. + """ + env = neon_simple_env + endpoint = env.endpoints.create_start("main") + + with endpoint.connect().cursor() as ses1: + # Add the required extensions + ses1.execute("CREATE EXTENSION pg_trgm;") + ses1.execute("CREATE EXTENSION pageinspect;") + # Create a test table + ses1.execute("CREATE TABLE test (payload text);") + # Create a GIN based index + ses1.execute( + "CREATE INDEX test_payload_idx ON test USING gin (payload gin_trgm_ops) WITH (gin_pending_list_limit = 64);" + ) + # insert a multibyte character to trigger order-dependent hashing + ses1.execute( + "INSERT INTO test SELECT '123456789BV' || CHR(127153) /* ace of spades, a multibyte character */ || i::text from generate_series(1, 40) as i(i);" + ) + ses1.execute("INSERT INTO test SELECT 'Bóbr';") + # Clean pending list to flush data to pages + ses1.execute("select gin_clean_pending_list('test_payload_idx'::regclass);") + ses1.execute(SIGNED_CHAR_EXTRACT) + pages = ses1.fetchall() + # Compare expected output + page1 = pages[0] + data = bytes(page1[1]).hex() + with open(Path(__file__).parent / "data" / "test_signed_char.out", encoding="utf-8") as f: + expected = f.read().rstrip() + + assert data == expected From 066324d6ec07e499256a431abcd8a87078d7f7cb Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Sat, 1 Mar 2025 00:48:05 +0200 Subject: [PATCH 064/207] compute_ctl: Rearrange startup code (#11007) Move most of the code to compute.rs, so that all the major startup steps are visible in one place. You can now get a pretty good picture of what happens in the latency-critical path at compute startup by reading ComputeNode::start_compute(). This also clarifies the error handling in start_compute. Previously, the start_postgres function sometimes returned an Err, and sometimes Ok but with the compute status already set to Failed. Now the start_compute function always returns Err on failure, and it's the caller's responsibility to change the compute status to Failed. Separately from that, it returns a handle to the Postgres process via a `&mut` reference if it had already started Postgres (i.e. on success, or if the failure happens after launching the Postgres process). --------- Co-authored-by: Alexey Kondratov --- compute_tools/src/bin/compute_ctl.rs | 465 +---------- compute_tools/src/catalog.rs | 6 +- compute_tools/src/compute.rs | 770 +++++++++++++----- compute_tools/src/http/routes/configure.rs | 2 +- .../src/http/routes/extension_server.rs | 10 +- compute_tools/src/logger.rs | 49 ++ compute_tools/src/monitor.rs | 2 +- 7 files changed, 667 insertions(+), 637 deletions(-) diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 6dae1a2753..08966a6efb 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -33,39 +33,27 @@ //! -b /usr/local/bin/postgres \ //! -r http://pg-ext-s3-gateway \ //! ``` -use std::collections::HashMap; use std::ffi::OsString; use std::fs::File; use std::path::Path; use std::process::exit; -use std::str::FromStr; -use std::sync::atomic::Ordering; -use std::sync::{Arc, Condvar, Mutex, RwLock, mpsc}; +use std::sync::mpsc; use std::thread; use std::time::Duration; use anyhow::{Context, Result}; -use chrono::Utc; use clap::Parser; -use compute_api::responses::{ComputeCtlConfig, ComputeStatus}; +use compute_api::responses::ComputeCtlConfig; use compute_api::spec::ComputeSpec; -use compute_tools::compute::{ - ComputeNode, ComputeState, PG_PID, ParsedSpec, forward_termination_signal, -}; -use compute_tools::configurator::launch_configurator; -use compute_tools::disk_quota::set_disk_quota; +use compute_tools::compute::{ComputeNode, ComputeNodeParams, forward_termination_signal}; use compute_tools::extension_server::get_pg_version_string; -use compute_tools::http::server::Server; use compute_tools::logger::*; -use compute_tools::lsn_lease::launch_lsn_lease_bg_task_for_static; -use compute_tools::monitor::launch_monitor; use compute_tools::params::*; use compute_tools::spec::*; -use compute_tools::swap::resize_swap; use rlimit::{Resource, setrlimit}; use signal_hook::consts::{SIGINT, SIGQUIT, SIGTERM}; use signal_hook::iterator::Signals; -use tracing::{error, info, warn}; +use tracing::{error, info}; use url::Url; use utils::failpoint_support; @@ -164,29 +152,40 @@ fn main() -> Result<()> { // enable core dumping for all child processes setrlimit(Resource::CORE, rlimit::INFINITY, rlimit::INFINITY)?; - let (pg_handle, start_pg_result) = { - // Enter startup tracing context - let _startup_context_guard = startup_context_from_env(); + let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?; - let cli_spec = try_spec_from_cli(&cli)?; + let cli_spec = try_spec_from_cli(&cli)?; - let compute = wait_spec(build_tag, &cli, cli_spec)?; + let compute_node = ComputeNode::new( + ComputeNodeParams { + compute_id: cli.compute_id, + connstr, + pgdata: cli.pgdata.clone(), + pgbin: cli.pgbin.clone(), + pgversion: get_pg_version_string(&cli.pgbin), + external_http_port: cli.external_http_port, + internal_http_port: cli.internal_http_port, + ext_remote_storage: cli.remote_ext_config.clone(), + resize_swap_on_bind: cli.resize_swap_on_bind, + set_disk_quota_for_fs: cli.set_disk_quota_for_fs, + #[cfg(target_os = "linux")] + filecache_connstr: cli.filecache_connstr, + #[cfg(target_os = "linux")] + cgroup: cli.cgroup, + #[cfg(target_os = "linux")] + vm_monitor_addr: cli.vm_monitor_addr, + build_tag, - start_postgres(&cli, compute)? + live_config_allowed: cli_spec.live_config_allowed, + }, + cli_spec.spec, + )?; - // Startup is finished, exit the startup tracing span - }; - - // PostgreSQL is now running, if startup was successful. Wait until it exits. - let wait_pg_result = wait_postgres(pg_handle)?; - - let delay_exit = cleanup_after_postgres_exit(start_pg_result)?; - - maybe_delay_exit(delay_exit); + let exit_code = compute_node.run()?; scenario.teardown(); - deinit_and_exit(wait_pg_result); + deinit_and_exit(exit_code); } async fn init() -> Result { @@ -207,56 +206,6 @@ async fn init() -> Result { Ok(build_tag) } -fn startup_context_from_env() -> Option { - // Extract OpenTelemetry context for the startup actions from the - // TRACEPARENT and TRACESTATE env variables, and attach it to the current - // tracing context. - // - // This is used to propagate the context for the 'start_compute' operation - // from the neon control plane. This allows linking together the wider - // 'start_compute' operation that creates the compute container, with the - // startup actions here within the container. - // - // There is no standard for passing context in env variables, but a lot of - // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See - // https://github.com/open-telemetry/opentelemetry-specification/issues/740 - // - // Switch to the startup context here, and exit it once the startup has - // completed and Postgres is up and running. - // - // If this pod is pre-created without binding it to any particular endpoint - // yet, this isn't the right place to enter the startup context. In that - // case, the control plane should pass the tracing context as part of the - // /configure API call. - // - // NOTE: This is supposed to only cover the *startup* actions. Once - // postgres is configured and up-and-running, we exit this span. Any other - // actions that are performed on incoming HTTP requests, for example, are - // performed in separate spans. - // - // XXX: If the pod is restarted, we perform the startup actions in the same - // context as the original startup actions, which probably doesn't make - // sense. - let mut startup_tracing_carrier: HashMap = HashMap::new(); - if let Ok(val) = std::env::var("TRACEPARENT") { - startup_tracing_carrier.insert("traceparent".to_string(), val); - } - if let Ok(val) = std::env::var("TRACESTATE") { - startup_tracing_carrier.insert("tracestate".to_string(), val); - } - if !startup_tracing_carrier.is_empty() { - use opentelemetry::propagation::TextMapPropagator; - use opentelemetry_sdk::propagation::TraceContextPropagator; - let guard = TraceContextPropagator::new() - .extract(&startup_tracing_carrier) - .attach(); - info!("startup tracing context attached"); - Some(guard) - } else { - None - } -} - fn try_spec_from_cli(cli: &Cli) -> Result { // First, try to get cluster spec from the cli argument if let Some(ref spec_json) = cli.spec_json { @@ -307,357 +256,7 @@ struct CliSpecParams { live_config_allowed: bool, } -fn wait_spec( - build_tag: String, - cli: &Cli, - CliSpecParams { - spec, - live_config_allowed, - compute_ctl_config: _, - }: CliSpecParams, -) -> Result> { - let mut new_state = ComputeState::new(); - let spec_set; - - if let Some(spec) = spec { - let pspec = ParsedSpec::try_from(spec).map_err(|msg| anyhow::anyhow!(msg))?; - info!("new pspec.spec: {:?}", pspec.spec); - new_state.pspec = Some(pspec); - spec_set = true; - } else { - spec_set = false; - } - let connstr = Url::parse(&cli.connstr).context("cannot parse connstr as a URL")?; - let conn_conf = postgres::config::Config::from_str(connstr.as_str()) - .context("cannot build postgres config from connstr")?; - let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr.as_str()) - .context("cannot build tokio postgres config from connstr")?; - let compute_node = ComputeNode { - compute_id: cli.compute_id.clone(), - connstr, - conn_conf, - tokio_conn_conf, - pgdata: cli.pgdata.clone(), - pgbin: cli.pgbin.clone(), - pgversion: get_pg_version_string(&cli.pgbin), - external_http_port: cli.external_http_port, - internal_http_port: cli.internal_http_port, - live_config_allowed, - state: Mutex::new(new_state), - state_changed: Condvar::new(), - ext_remote_storage: cli.remote_ext_config.clone(), - ext_download_progress: RwLock::new(HashMap::new()), - build_tag, - }; - let compute = Arc::new(compute_node); - - // If this is a pooled VM, prewarm before starting HTTP server and becoming - // available for binding. Prewarming helps Postgres start quicker later, - // because QEMU will already have its memory allocated from the host, and - // the necessary binaries will already be cached. - if !spec_set { - compute.prewarm_postgres()?; - } - - // Launch the external HTTP server first, so that we can serve control plane - // requests while configuration is still in progress. - Server::External(cli.external_http_port).launch(&compute); - - // The internal HTTP server could be launched later, but there isn't much - // sense in waiting. - Server::Internal(cli.internal_http_port).launch(&compute); - - if !spec_set { - // No spec provided, hang waiting for it. - info!("no compute spec provided, waiting"); - - let mut state = compute.state.lock().unwrap(); - while state.status != ComputeStatus::ConfigurationPending { - state = compute.state_changed.wait(state).unwrap(); - - if state.status == ComputeStatus::ConfigurationPending { - info!("got spec, continue configuration"); - // Spec is already set by the http server handler. - break; - } - } - - // Record for how long we slept waiting for the spec. - let now = Utc::now(); - state.metrics.wait_for_spec_ms = now - .signed_duration_since(state.start_time) - .to_std() - .unwrap() - .as_millis() as u64; - - // Reset start time, so that the total startup time that is calculated later will - // not include the time that we waited for the spec. - state.start_time = now; - } - - launch_lsn_lease_bg_task_for_static(&compute); - - Ok(compute) -} - -fn start_postgres( - cli: &Cli, - compute: Arc, -) -> Result<(Option, StartPostgresResult)> { - // We got all we need, update the state. - let mut state = compute.state.lock().unwrap(); - - // Create a tracing span for the startup operation. - // - // We could otherwise just annotate the function with #[instrument], but if - // we're being configured from a /configure HTTP request, we want the - // startup to be considered part of the /configure request. - let _this_entered = { - // Temporarily enter the /configure request's span, so that the new span - // becomes its child. - let _parent_entered = state.startup_span.take().map(|p| p.entered()); - - tracing::info_span!("start_postgres") - } - .entered(); - - state.set_status(ComputeStatus::Init, &compute.state_changed); - - info!( - "running compute with features: {:?}", - state.pspec.as_ref().unwrap().spec.features - ); - // before we release the mutex, fetch some parameters for later. - let &ComputeSpec { - swap_size_bytes, - disk_quota_bytes, - #[cfg(target_os = "linux")] - disable_lfc_resizing, - .. - } = &state.pspec.as_ref().unwrap().spec; - drop(state); - - // Launch remaining service threads - let _monitor_handle = launch_monitor(&compute); - let _configurator_handle = launch_configurator(&compute); - - let mut prestartup_failed = false; - let mut delay_exit = false; - - // Resize swap to the desired size if the compute spec says so - if let (Some(size_bytes), true) = (swap_size_bytes, cli.resize_swap_on_bind) { - // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion - // *before* starting postgres. - // - // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this - // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets - // OOM-killed during startup because swap wasn't available yet. - match resize_swap(size_bytes) { - Ok(()) => { - let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display. - info!(%size_bytes, %size_mib, "resized swap"); - } - Err(err) => { - let err = err.context("failed to resize swap"); - error!("{err:#}"); - - // Mark compute startup as failed; don't try to start postgres, and report this - // error to the control plane when it next asks. - prestartup_failed = true; - compute.set_failed_status(err); - delay_exit = true; - } - } - } - - // Set disk quota if the compute spec says so - if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) = - (disk_quota_bytes, cli.set_disk_quota_for_fs.as_ref()) - { - match set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) { - Ok(()) => { - let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display. - info!(%disk_quota_bytes, %size_mib, "set disk quota"); - } - Err(err) => { - let err = err.context("failed to set disk quota"); - error!("{err:#}"); - - // Mark compute startup as failed; don't try to start postgres, and report this - // error to the control plane when it next asks. - prestartup_failed = true; - compute.set_failed_status(err); - delay_exit = true; - } - } - } - - // Start Postgres - let mut pg = None; - if !prestartup_failed { - pg = match compute.start_compute() { - Ok(pg) => { - info!(postmaster_pid = %pg.0.id(), "Postgres was started"); - Some(pg) - } - Err(err) => { - error!("could not start the compute node: {:#}", err); - compute.set_failed_status(err); - delay_exit = true; - None - } - }; - } else { - warn!("skipping postgres startup because pre-startup step failed"); - } - - // Start the vm-monitor if directed to. The vm-monitor only runs on linux - // because it requires cgroups. - cfg_if::cfg_if! { - if #[cfg(target_os = "linux")] { - use std::env; - use tokio_util::sync::CancellationToken; - - // This token is used internally by the monitor to clean up all threads - let token = CancellationToken::new(); - - // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC - let pgconnstr = if disable_lfc_resizing.unwrap_or(false) { - None - } else { - Some(cli.filecache_connstr.clone()) - }; - - let vm_monitor = if env::var_os("AUTOSCALING").is_some() { - let vm_monitor = tokio::spawn(vm_monitor::start( - Box::leak(Box::new(vm_monitor::Args { - cgroup: Some(cli.cgroup.clone()), - pgconnstr, - addr: cli.vm_monitor_addr.clone(), - })), - token.clone(), - )); - Some(vm_monitor) - } else { - None - }; - } - } - - Ok(( - pg, - StartPostgresResult { - delay_exit, - compute, - #[cfg(target_os = "linux")] - token, - #[cfg(target_os = "linux")] - vm_monitor, - }, - )) -} - -type PostgresHandle = (std::process::Child, tokio::task::JoinHandle>); - -struct StartPostgresResult { - delay_exit: bool, - // passed through from WaitSpecResult - compute: Arc, - - #[cfg(target_os = "linux")] - token: tokio_util::sync::CancellationToken, - #[cfg(target_os = "linux")] - vm_monitor: Option>>, -} - -fn wait_postgres(pg: Option) -> Result { - // Wait for the child Postgres process forever. In this state Ctrl+C will - // propagate to Postgres and it will be shut down as well. - let mut exit_code = None; - if let Some((mut pg, logs_handle)) = pg { - info!(postmaster_pid = %pg.id(), "Waiting for Postgres to exit"); - - let ecode = pg - .wait() - .expect("failed to start waiting on Postgres process"); - PG_PID.store(0, Ordering::SeqCst); - - // Process has exited. Wait for the log collecting task to finish. - let _ = tokio::runtime::Handle::current() - .block_on(logs_handle) - .map_err(|e| tracing::error!("log task panicked: {:?}", e)); - - info!("Postgres exited with code {}, shutting down", ecode); - exit_code = ecode.code() - } - - Ok(WaitPostgresResult { exit_code }) -} - -struct WaitPostgresResult { - exit_code: Option, -} - -fn cleanup_after_postgres_exit( - StartPostgresResult { - mut delay_exit, - compute, - #[cfg(target_os = "linux")] - vm_monitor, - #[cfg(target_os = "linux")] - token, - }: StartPostgresResult, -) -> Result { - // Terminate the vm_monitor so it releases the file watcher on - // /sys/fs/cgroup/neon-postgres. - // Note: the vm-monitor only runs on linux because it requires cgroups. - cfg_if::cfg_if! { - if #[cfg(target_os = "linux")] { - if let Some(handle) = vm_monitor { - // Kills all threads spawned by the monitor - token.cancel(); - // Kills the actual task running the monitor - handle.abort(); - } - } - } - - // Maybe sync safekeepers again, to speed up next startup - let compute_state = compute.state.lock().unwrap().clone(); - let pspec = compute_state.pspec.as_ref().expect("spec must be set"); - if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) { - info!("syncing safekeepers on shutdown"); - let storage_auth_token = pspec.storage_auth_token.clone(); - let lsn = compute.sync_safekeepers(storage_auth_token)?; - info!("synced safekeepers at lsn {lsn}"); - } - - let mut state = compute.state.lock().unwrap(); - if state.status == ComputeStatus::TerminationPending { - state.status = ComputeStatus::Terminated; - compute.state_changed.notify_all(); - // we were asked to terminate gracefully, don't exit to avoid restart - delay_exit = true - } - drop(state); - - if let Err(err) = compute.check_for_core_dumps() { - error!("error while checking for core dumps: {err:?}"); - } - - Ok(delay_exit) -} - -fn maybe_delay_exit(delay_exit: bool) { - // If launch failed, keep serving HTTP requests for a while, so the cloud - // control plane can get the actual error. - if delay_exit { - info!("giving control plane 30s to collect the error before shutdown"); - thread::sleep(Duration::from_secs(30)); - } -} - -fn deinit_and_exit(WaitPostgresResult { exit_code }: WaitPostgresResult) -> ! { +fn deinit_and_exit(exit_code: Option) -> ! { // Shutdown trace pipeline gracefully, so that it has a chance to send any // pending traces before we exit. Shutting down OTEL tracing provider may // hang for quite some time, see, for example: diff --git a/compute_tools/src/catalog.rs b/compute_tools/src/catalog.rs index 2a7f56e6fc..db3e07e086 100644 --- a/compute_tools/src/catalog.rs +++ b/compute_tools/src/catalog.rs @@ -58,14 +58,14 @@ pub async fn get_database_schema( compute: &Arc, dbname: &str, ) -> Result> + use<>, SchemaDumpError> { - let pgbin = &compute.pgbin; + let pgbin = &compute.params.pgbin; let basepath = Path::new(pgbin).parent().unwrap(); let pgdump = basepath.join("pg_dump"); // Replace the DB in the connection string and disable it to parts. // This is the only option to handle DBs with special characters. - let conf = - postgres_conf_for_db(&compute.connstr, dbname).map_err(|_| SchemaDumpError::Unexpected)?; + let conf = postgres_conf_for_db(&compute.params.connstr, dbname) + .map_err(|_| SchemaDumpError::Unexpected)?; let host = conf .get_hosts() .first() diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index c0e28790d6..9e065e84a4 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -28,28 +28,53 @@ use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; use utils::measured_stream::MeasuredReader; +use crate::configurator::launch_configurator; +use crate::disk_quota::set_disk_quota; use crate::installed_extensions::get_installed_extensions; +use crate::logger::startup_context_from_env; +use crate::lsn_lease::launch_lsn_lease_bg_task_for_static; +use crate::monitor::launch_monitor; use crate::pg_helpers::*; use crate::spec::*; +use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; use crate::{config, extension_server, local_proxy}; pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0); pub static PG_PID: AtomicU32 = AtomicU32::new(0); -/// Compute node info shared across several `compute_ctl` threads. -pub struct ComputeNode { +/// Static configuration params that don't change after startup. These mostly +/// come from the CLI args, or are derived from them. +pub struct ComputeNodeParams { /// The ID of the compute pub compute_id: String, // Url type maintains proper escaping pub connstr: url::Url, - // We connect to Postgres from many different places, so build configs once - // and reuse them where needed. - pub conn_conf: postgres::config::Config, - pub tokio_conn_conf: tokio_postgres::config::Config, + + pub resize_swap_on_bind: bool, + pub set_disk_quota_for_fs: Option, + + // VM monitor parameters + #[cfg(target_os = "linux")] + pub filecache_connstr: String, + #[cfg(target_os = "linux")] + pub cgroup: String, + #[cfg(target_os = "linux")] + pub vm_monitor_addr: String, + pub pgdata: String, pub pgbin: String, pub pgversion: String, + pub build_tag: String, + + /// The port that the compute's external HTTP server listens on + pub external_http_port: u16, + /// The port that the compute's internal HTTP server listens on + pub internal_http_port: u16, + + /// the address of extension storage proxy gateway + pub ext_remote_storage: Option, + /// We should only allow live re- / configuration of the compute node if /// it uses 'pull model', i.e. it can go to control-plane and fetch /// the latest configuration. Otherwise, there could be a case: @@ -63,10 +88,17 @@ pub struct ComputeNode { /// - we push spec and it does configuration /// - but then it is restarted without any spec again pub live_config_allowed: bool, - /// The port that the compute's external HTTP server listens on - pub external_http_port: u16, - /// The port that the compute's internal HTTP server listens on - pub internal_http_port: u16, +} + +/// Compute node info shared across several `compute_ctl` threads. +pub struct ComputeNode { + pub params: ComputeNodeParams, + + // We connect to Postgres from many different places, so build configs once + // and reuse them where needed. These are derived from 'params.connstr' + pub conn_conf: postgres::config::Config, + pub tokio_conn_conf: tokio_postgres::config::Config, + /// Volatile part of the `ComputeNode`, which should be used under `Mutex`. /// To allow HTTP API server to serving status requests, while configuration /// is in progress, lock should be held only for short periods of time to do @@ -74,11 +106,9 @@ pub struct ComputeNode { pub state: Mutex, /// `Condvar` to allow notifying waiters about state changes. pub state_changed: Condvar, - /// the address of extension storage proxy gateway - pub ext_remote_storage: Option, + // key: ext_archive_name, value: started download time, download_completed? pub ext_download_progress: RwLock, bool)>>, - pub build_tag: String, } // store some metrics about download size that might impact startup time @@ -242,6 +272,25 @@ fn maybe_cgexec(cmd: &str) -> Command { } } +struct PostgresHandle { + postgres: std::process::Child, + log_collector: tokio::task::JoinHandle>, +} + +impl PostgresHandle { + /// Return PID of the postgres (postmaster) process + fn pid(&self) -> Pid { + Pid::from_raw(self.postgres.id() as i32) + } +} + +struct StartVmMonitorResult { + #[cfg(target_os = "linux")] + token: tokio_util::sync::CancellationToken, + #[cfg(target_os = "linux")] + vm_monitor: Option>>, +} + pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { let roles = spec .cluster @@ -316,6 +365,421 @@ pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { } impl ComputeNode { + pub fn new(params: ComputeNodeParams, cli_spec: Option) -> Result { + let connstr = params.connstr.as_str(); + let conn_conf = postgres::config::Config::from_str(connstr) + .context("cannot build postgres config from connstr")?; + let tokio_conn_conf = tokio_postgres::config::Config::from_str(connstr) + .context("cannot build tokio postgres config from connstr")?; + + let mut new_state = ComputeState::new(); + if let Some(cli_spec) = cli_spec { + let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?; + new_state.pspec = Some(pspec); + } + + Ok(ComputeNode { + params, + conn_conf, + tokio_conn_conf, + state: Mutex::new(new_state), + state_changed: Condvar::new(), + ext_download_progress: RwLock::new(HashMap::new()), + }) + } + + /// Top-level control flow of compute_ctl. Returns a process exit code we should + /// exit with. + pub fn run(self) -> Result> { + let this = Arc::new(self); + + let cli_spec = this.state.lock().unwrap().pspec.clone(); + + // If this is a pooled VM, prewarm before starting HTTP server and becoming + // available for binding. Prewarming helps Postgres start quicker later, + // because QEMU will already have its memory allocated from the host, and + // the necessary binaries will already be cached. + if cli_spec.is_none() { + this.prewarm_postgres()?; + } + + // Launch the external HTTP server first, so that we can serve control plane + // requests while configuration is still in progress. + crate::http::server::Server::External(this.params.external_http_port).launch(&this); + + // The internal HTTP server could be launched later, but there isn't much + // sense in waiting. + crate::http::server::Server::Internal(this.params.internal_http_port).launch(&this); + + // If we got a spec from the CLI already, use that. Otherwise wait for the + // control plane to pass it to us with a /configure HTTP request + let pspec = if let Some(cli_spec) = cli_spec { + cli_spec + } else { + this.wait_spec()? + }; + + launch_lsn_lease_bg_task_for_static(&this); + + // We have a spec, start the compute + let mut delay_exit = false; + let mut vm_monitor = None; + let mut pg_process: Option = None; + + match this.start_compute(&mut pg_process) { + Ok(()) => { + // Success! Launch remaining services (just vm-monitor currently) + vm_monitor = + Some(this.start_vm_monitor(pspec.spec.disable_lfc_resizing.unwrap_or(false))); + } + Err(err) => { + // Something went wrong with the startup. Log it and expose the error to + // HTTP status requests. + error!("could not start the compute node: {:#}", err); + this.set_failed_status(err); + delay_exit = true; + + // If the error happened after starting PostgreSQL, kill it + if let Some(ref pg_process) = pg_process { + kill(pg_process.pid(), Signal::SIGQUIT).ok(); + } + } + } + + // If startup was successful, or it failed in the late stages, + // PostgreSQL is now running. Wait until it exits. + let exit_code = if let Some(pg_handle) = pg_process { + let exit_status = this.wait_postgres(pg_handle); + info!("Postgres exited with code {}, shutting down", exit_status); + exit_status.code() + } else { + None + }; + + // Terminate the vm_monitor so it releases the file watcher on + // /sys/fs/cgroup/neon-postgres. + // Note: the vm-monitor only runs on linux because it requires cgroups. + if let Some(vm_monitor) = vm_monitor { + cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + // Kills all threads spawned by the monitor + vm_monitor.token.cancel(); + if let Some(handle) = vm_monitor.vm_monitor { + // Kills the actual task running the monitor + handle.abort(); + } + } + } + } + + // Reap the postgres process + delay_exit |= this.cleanup_after_postgres_exit()?; + + // If launch failed, keep serving HTTP requests for a while, so the cloud + // control plane can get the actual error. + if delay_exit { + info!("giving control plane 30s to collect the error before shutdown"); + std::thread::sleep(Duration::from_secs(30)); + } + Ok(exit_code) + } + + pub fn wait_spec(&self) -> Result { + info!("no compute spec provided, waiting"); + let mut state = self.state.lock().unwrap(); + while state.status != ComputeStatus::ConfigurationPending { + state = self.state_changed.wait(state).unwrap(); + } + + info!("got spec, continue configuration"); + let spec = state.pspec.as_ref().unwrap().clone(); + + // Record for how long we slept waiting for the spec. + let now = Utc::now(); + state.metrics.wait_for_spec_ms = now + .signed_duration_since(state.start_time) + .to_std() + .unwrap() + .as_millis() as u64; + + // Reset start time, so that the total startup time that is calculated later will + // not include the time that we waited for the spec. + state.start_time = now; + + Ok(spec) + } + + /// Start compute. + /// + /// Prerequisites: + /// - the compute spec has been placed in self.state.pspec + /// + /// On success: + /// - status is set to ComputeStatus::Running + /// - self.running_postgres is set + /// + /// On error: + /// - status is left in ComputeStatus::Init. The caller is responsible for setting it to Failed + /// - if Postgres was started before the fatal error happened, self.running_postgres is + /// set. The caller is responsible for killing it. + fn start_compute(self: &Arc, pg_handle: &mut Option) -> Result<()> { + let compute_state: ComputeState; + + let _this_entered; + { + let mut state_guard = self.state.lock().unwrap(); + + // Create a tracing span for the startup operation. + // + // We could otherwise just annotate the function with #[instrument], but if + // we're being configured from a /configure HTTP request, we want the + // startup to be considered part of the /configure request. + // + // Similarly, if a trace ID was passed in env variables, attach it to the span. + _this_entered = { + // Temporarily enter the parent span, so that the new span becomes its child. + if let Some(p) = state_guard.startup_span.take() { + let _parent_entered = p.entered(); + tracing::info_span!("start_compute") + } else if let Some(otel_context) = startup_context_from_env() { + use tracing_opentelemetry::OpenTelemetrySpanExt; + let span = tracing::info_span!("start_compute"); + span.set_parent(otel_context); + span + } else { + tracing::info_span!("start_compute") + } + } + .entered(); + + state_guard.set_status(ComputeStatus::Init, &self.state_changed); + compute_state = state_guard.clone() + } + + let pspec = compute_state.pspec.as_ref().expect("spec must be set"); + info!( + "starting compute for project {}, operation {}, tenant {}, timeline {}, features {:?}, spec.remote_extensions {:?}", + pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None"), + pspec.spec.operation_uuid.as_deref().unwrap_or("None"), + pspec.tenant_id, + pspec.timeline_id, + pspec.spec.features, + pspec.spec.remote_extensions, + ); + + // Launch remaining service threads + let _monitor_handle = launch_monitor(self); + let _configurator_handle = launch_configurator(self); + + // Resize swap to the desired size if the compute spec says so + if let (Some(size_bytes), true) = + (pspec.spec.swap_size_bytes, self.params.resize_swap_on_bind) + { + // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion + // *before* starting postgres. + // + // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this + // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets + // OOM-killed during startup because swap wasn't available yet. + resize_swap(size_bytes).context("failed to resize swap")?; + let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display. + info!(%size_bytes, %size_mib, "resized swap"); + } + + // Set disk quota if the compute spec says so + if let (Some(disk_quota_bytes), Some(disk_quota_fs_mountpoint)) = ( + pspec.spec.disk_quota_bytes, + self.params.set_disk_quota_for_fs.as_ref(), + ) { + set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) + .context("failed to set disk quota")?; + let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display. + info!(%disk_quota_bytes, %size_mib, "set disk quota"); + } + + // tune pgbouncer + if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings { + info!("tuning pgbouncer"); + + // Spawn a background task to do the tuning, + // so that we don't block the main thread that starts Postgres. + let pgbouncer_settings = pgbouncer_settings.clone(); + let _handle = tokio::spawn(async move { + let res = tune_pgbouncer(pgbouncer_settings).await; + if let Err(err) = res { + error!("error while tuning pgbouncer: {err:?}"); + // Continue with the startup anyway + } + }); + } + + // configure local_proxy + if let Some(local_proxy) = &pspec.spec.local_proxy_config { + info!("configuring local_proxy"); + + // Spawn a background task to do the configuration, + // so that we don't block the main thread that starts Postgres. + let local_proxy = local_proxy.clone(); + let _handle = tokio::spawn(async move { + if let Err(err) = local_proxy::configure(&local_proxy) { + error!("error while configuring local_proxy: {err:?}"); + // Continue with the startup anyway + } + }); + } + + // This part is sync, because we need to download + // remote shared_preload_libraries before postgres start (if any) + if let Some(remote_extensions) = &pspec.spec.remote_extensions { + // First, create control files for all availale extensions + extension_server::create_control_files(remote_extensions, &self.params.pgbin); + + let library_load_start_time = Utc::now(); + let rt = tokio::runtime::Handle::current(); + let remote_ext_metrics = rt.block_on(self.prepare_preload_libraries(&pspec.spec))?; + + let library_load_time = Utc::now() + .signed_duration_since(library_load_start_time) + .to_std() + .unwrap() + .as_millis() as u64; + let mut state = self.state.lock().unwrap(); + state.metrics.load_ext_ms = library_load_time; + state.metrics.num_ext_downloaded = remote_ext_metrics.num_ext_downloaded; + state.metrics.largest_ext_size = remote_ext_metrics.largest_ext_size; + state.metrics.total_ext_download_size = remote_ext_metrics.total_ext_download_size; + info!( + "Loading shared_preload_libraries took {:?}ms", + library_load_time + ); + info!("{:?}", remote_ext_metrics); + } + + // Prepre pgdata directory. This downloads the basebackup, among other things. + self.prepare_pgdata(&compute_state)?; + + // Start Postgres + let start_time = Utc::now(); + let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?; + let postmaster_pid = pg_process.pid(); + *pg_handle = Some(pg_process); + + // If this is a primary endpoint, perform some post-startup configuration before + // opening it up for the world. + let config_time = Utc::now(); + if pspec.spec.mode == ComputeMode::Primary { + self.configure_as_primary(&compute_state)?; + let conf = self.get_conn_conf(None); + tokio::task::spawn_blocking(|| { + let res = get_installed_extensions(conf); + match res { + Ok(extensions) => { + info!( + "[NEON_EXT_STAT] {}", + serde_json::to_string(&extensions) + .expect("failed to serialize extensions list") + ); + } + Err(err) => error!("could not get installed extensions: {err:?}"), + } + }); + } + + // All done! + let startup_end_time = Utc::now(); + let metrics = { + let mut state = self.state.lock().unwrap(); + state.metrics.start_postgres_ms = config_time + .signed_duration_since(start_time) + .to_std() + .unwrap() + .as_millis() as u64; + state.metrics.config_ms = startup_end_time + .signed_duration_since(config_time) + .to_std() + .unwrap() + .as_millis() as u64; + state.metrics.total_startup_ms = startup_end_time + .signed_duration_since(compute_state.start_time) + .to_std() + .unwrap() + .as_millis() as u64; + state.metrics.clone() + }; + self.set_status(ComputeStatus::Running); + + // Log metrics so that we can search for slow operations in logs + info!(?metrics, postmaster_pid = %postmaster_pid, "compute start finished"); + + Ok(()) + } + + /// Start the vm-monitor if directed to. The vm-monitor only runs on linux + /// because it requires cgroups. + fn start_vm_monitor(&self, disable_lfc_resizing: bool) -> StartVmMonitorResult { + cfg_if::cfg_if! { + if #[cfg(target_os = "linux")] { + use std::env; + use tokio_util::sync::CancellationToken; + + // This token is used internally by the monitor to clean up all threads + let token = CancellationToken::new(); + + // don't pass postgres connection string to vm-monitor if we don't want it to resize LFC + let pgconnstr = if disable_lfc_resizing { + None + } else { + Some(self.params.filecache_connstr.clone()) + }; + + let vm_monitor = if env::var_os("AUTOSCALING").is_some() { + let vm_monitor = tokio::spawn(vm_monitor::start( + Box::leak(Box::new(vm_monitor::Args { + cgroup: Some(self.params.cgroup.clone()), + pgconnstr, + addr: self.params.vm_monitor_addr.clone(), + })), + token.clone(), + )); + Some(vm_monitor) + } else { + None + }; + StartVmMonitorResult { token, vm_monitor } + } else { + StartVmMonitorResult { } + } + } + } + + fn cleanup_after_postgres_exit(&self) -> Result { + // Maybe sync safekeepers again, to speed up next startup + let compute_state = self.state.lock().unwrap().clone(); + let pspec = compute_state.pspec.as_ref().expect("spec must be set"); + if matches!(pspec.spec.mode, compute_api::spec::ComputeMode::Primary) { + info!("syncing safekeepers on shutdown"); + let storage_auth_token = pspec.storage_auth_token.clone(); + let lsn = self.sync_safekeepers(storage_auth_token)?; + info!("synced safekeepers at lsn {lsn}"); + } + + let mut delay_exit = false; + let mut state = self.state.lock().unwrap(); + if state.status == ComputeStatus::TerminationPending { + state.status = ComputeStatus::Terminated; + self.state_changed.notify_all(); + // we were asked to terminate gracefully, don't exit to avoid restart + delay_exit = true + } + drop(state); + + if let Err(err) = self.check_for_core_dumps() { + error!("error while checking for core dumps: {err:?}"); + } + + Ok(delay_exit) + } + /// Check that compute node has corresponding feature enabled. pub fn has_feature(&self, feature: ComputeFeature) -> bool { let state = self.state.lock().unwrap(); @@ -354,9 +818,10 @@ impl ComputeNode { fn create_pgdata(&self) -> Result<()> { // Ignore removal error, likely it is a 'No such file or directory (os error 2)'. // If it is something different then create_dir() will error out anyway. - let _ok = fs::remove_dir_all(&self.pgdata); - fs::create_dir(&self.pgdata)?; - fs::set_permissions(&self.pgdata, fs::Permissions::from_mode(0o700))?; + let pgdata = &self.params.pgdata; + let _ok = fs::remove_dir_all(pgdata); + fs::create_dir(pgdata)?; + fs::set_permissions(pgdata, fs::Permissions::from_mode(0o700))?; Ok(()) } @@ -421,7 +886,7 @@ impl ComputeNode { // sends an Error after finishing the tarball, we will not notice it. let mut ar = tar::Archive::new(flate2::read::GzDecoder::new(&mut bufreader)); ar.set_ignore_zeros(true); - ar.unpack(&self.pgdata)?; + ar.unpack(&self.params.pgdata)?; // Report metrics let mut state = self.state.lock().unwrap(); @@ -566,9 +1031,9 @@ impl ComputeNode { pub fn sync_safekeepers(&self, storage_auth_token: Option) -> Result { let start_time = Utc::now(); - let mut sync_handle = maybe_cgexec(&self.pgbin) + let mut sync_handle = maybe_cgexec(&self.params.pgbin) .args(["--sync-safekeepers"]) - .env("PGDATA", &self.pgdata) // we cannot use -D in this mode + .env("PGDATA", &self.params.pgdata) // we cannot use -D in this mode .envs(if let Some(storage_auth_token) = &storage_auth_token { vec![("NEON_AUTH_TOKEN", storage_auth_token)] } else { @@ -625,14 +1090,14 @@ impl ComputeNode { pub fn prepare_pgdata(&self, compute_state: &ComputeState) -> Result<()> { let pspec = compute_state.pspec.as_ref().expect("spec must be set"); let spec = &pspec.spec; - let pgdata_path = Path::new(&self.pgdata); + let pgdata_path = Path::new(&self.params.pgdata); // Remove/create an empty pgdata directory and put configuration there. self.create_pgdata()?; config::write_postgres_conf( &pgdata_path.join("postgresql.conf"), &pspec.spec, - self.internal_http_port, + self.params.internal_http_port, )?; // Syncing safekeepers is only safe with primary nodes: if a primary @@ -732,12 +1197,15 @@ impl ComputeNode { info!("prewarming"); // Create pgdata - let pgdata = &format!("{}.warmup", self.pgdata); + let pgdata = &format!("{}.warmup", self.params.pgdata); create_pgdata(pgdata)?; // Run initdb to completion info!("running initdb"); - let initdb_bin = Path::new(&self.pgbin).parent().unwrap().join("initdb"); + let initdb_bin = Path::new(&self.params.pgbin) + .parent() + .unwrap() + .join("initdb"); Command::new(initdb_bin) .args(["--pgdata", pgdata]) .output() @@ -753,7 +1221,7 @@ impl ComputeNode { // Start postgres info!("starting postgres"); - let mut pg = maybe_cgexec(&self.pgbin) + let mut pg = maybe_cgexec(&self.params.pgbin) .args(["-D", pgdata]) .spawn() .expect("cannot start postgres process"); @@ -780,15 +1248,12 @@ impl ComputeNode { /// /// Returns a handle to the child process and a handle to the logs thread. #[instrument(skip_all)] - pub fn start_postgres( - &self, - storage_auth_token: Option, - ) -> Result<(std::process::Child, tokio::task::JoinHandle>)> { - let pgdata_path = Path::new(&self.pgdata); + pub fn start_postgres(&self, storage_auth_token: Option) -> Result { + let pgdata_path = Path::new(&self.params.pgdata); // Run postgres as a child process. - let mut pg = maybe_cgexec(&self.pgbin) - .args(["-D", &self.pgdata]) + let mut pg = maybe_cgexec(&self.params.pgbin) + .args(["-D", &self.params.pgdata]) .envs(if let Some(storage_auth_token) = &storage_auth_token { vec![("NEON_AUTH_TOKEN", storage_auth_token)] } else { @@ -805,7 +1270,29 @@ impl ComputeNode { wait_for_postgres(&mut pg, pgdata_path)?; - Ok((pg, logs_handle)) + Ok(PostgresHandle { + postgres: pg, + log_collector: logs_handle, + }) + } + + /// Wait for the child Postgres process forever. In this state Ctrl+C will + /// propagate to Postgres and it will be shut down as well. + fn wait_postgres(&self, mut pg_handle: PostgresHandle) -> std::process::ExitStatus { + info!(postmaster_pid = %pg_handle.postgres.id(), "Waiting for Postgres to exit"); + + let ecode = pg_handle + .postgres + .wait() + .expect("failed to start waiting on Postgres process"); + PG_PID.store(0, Ordering::SeqCst); + + // Process has exited. Wait for the log collecting task to finish. + let _ = tokio::runtime::Handle::current() + .block_on(pg_handle.log_collector) + .map_err(|e| tracing::error!("log task panicked: {:?}", e)); + + ecode } /// Do post configuration of the already started Postgres. This function spawns a background task to @@ -972,9 +1459,12 @@ impl ComputeNode { // `pg_ctl` for start / stop. #[instrument(skip_all)] fn pg_reload_conf(&self) -> Result<()> { - let pgctl_bin = Path::new(&self.pgbin).parent().unwrap().join("pg_ctl"); + let pgctl_bin = Path::new(&self.params.pgbin) + .parent() + .unwrap() + .join("pg_ctl"); Command::new(pgctl_bin) - .args(["reload", "-D", &self.pgdata]) + .args(["reload", "-D", &self.params.pgdata]) .output() .expect("cannot run pg_ctl process"); Ok(()) @@ -1014,9 +1504,9 @@ impl ComputeNode { } // Write new config - let pgdata_path = Path::new(&self.pgdata); + let pgdata_path = Path::new(&self.params.pgdata); let postgresql_conf_path = pgdata_path.join("postgresql.conf"); - config::write_postgres_conf(&postgresql_conf_path, &spec, self.internal_http_port)?; + config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?; if !spec.skip_pg_catalog_updates { let max_concurrent_connections = spec.reconfigure_concurrency; @@ -1027,7 +1517,8 @@ impl ComputeNode { self.pg_reload_conf()?; if spec.mode == ComputeMode::Primary { - let mut conf = tokio_postgres::Config::from_str(self.connstr.as_str()).unwrap(); + let mut conf = + tokio_postgres::Config::from_str(self.params.connstr.as_str()).unwrap(); conf.application_name("apply_config"); let conf = Arc::new(conf); @@ -1053,166 +1544,52 @@ impl ComputeNode { } #[instrument(skip_all)] - pub fn start_compute( - &self, - ) -> Result<(std::process::Child, tokio::task::JoinHandle>)> { - let compute_state = self.state.lock().unwrap().clone(); + pub fn configure_as_primary(&self, compute_state: &ComputeState) -> Result<()> { let pspec = compute_state.pspec.as_ref().expect("spec must be set"); - info!( - "starting compute for project {}, operation {}, tenant {}, timeline {}", - pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None"), - pspec.spec.operation_uuid.as_deref().unwrap_or("None"), - pspec.tenant_id, - pspec.timeline_id, - ); - // tune pgbouncer - if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings { - info!("tuning pgbouncer"); + assert!(pspec.spec.mode == ComputeMode::Primary); + if !pspec.spec.skip_pg_catalog_updates { + let pgdata_path = Path::new(&self.params.pgdata); + // temporarily reset max_cluster_size in config + // to avoid the possibility of hitting the limit, while we are applying config: + // creating new extensions, roles, etc... + config::with_compute_ctl_tmp_override(pgdata_path, "neon.max_cluster_size=-1", || { + self.pg_reload_conf()?; - // Spawn a background task to do the tuning, - // so that we don't block the main thread that starts Postgres. - let pgbouncer_settings = pgbouncer_settings.clone(); - let _handle = tokio::spawn(async move { - let res = tune_pgbouncer(pgbouncer_settings).await; - if let Err(err) = res { - error!("error while tuning pgbouncer: {err:?}"); - } - }); + self.apply_config(compute_state)?; + + Ok(()) + })?; + + let postgresql_conf_path = pgdata_path.join("postgresql.conf"); + if config::line_in_file( + &postgresql_conf_path, + "neon.disable_logical_replication_subscribers=false", + )? { + info!( + "updated postgresql.conf to set neon.disable_logical_replication_subscribers=false" + ); + } + self.pg_reload_conf()?; } + self.post_apply_config()?; - if let Some(local_proxy) = &pspec.spec.local_proxy_config { - info!("configuring local_proxy"); - - // Spawn a background task to do the configuration, - // so that we don't block the main thread that starts Postgres. - let local_proxy = local_proxy.clone(); - let _handle = tokio::spawn(async move { - if let Err(err) = local_proxy::configure(&local_proxy) { - error!("error while configuring local_proxy: {err:?}"); - } - }); - } - - info!( - "start_compute spec.remote_extensions {:?}", - pspec.spec.remote_extensions - ); - - // This part is sync, because we need to download - // remote shared_preload_libraries before postgres start (if any) - if let Some(remote_extensions) = &pspec.spec.remote_extensions { - // First, create control files for all availale extensions - extension_server::create_control_files(remote_extensions, &self.pgbin); - - let library_load_start_time = Utc::now(); - let rt = tokio::runtime::Handle::current(); - let remote_ext_metrics = rt.block_on(self.prepare_preload_libraries(&pspec.spec))?; - - let library_load_time = Utc::now() - .signed_duration_since(library_load_start_time) - .to_std() - .unwrap() - .as_millis() as u64; - let mut state = self.state.lock().unwrap(); - state.metrics.load_ext_ms = library_load_time; - state.metrics.num_ext_downloaded = remote_ext_metrics.num_ext_downloaded; - state.metrics.largest_ext_size = remote_ext_metrics.largest_ext_size; - state.metrics.total_ext_download_size = remote_ext_metrics.total_ext_download_size; - info!( - "Loading shared_preload_libraries took {:?}ms", - library_load_time - ); - info!("{:?}", remote_ext_metrics); - } - - self.prepare_pgdata(&compute_state)?; - - let start_time = Utc::now(); - let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?; - - let config_time = Utc::now(); - if pspec.spec.mode == ComputeMode::Primary { - if !pspec.spec.skip_pg_catalog_updates { - let pgdata_path = Path::new(&self.pgdata); - // temporarily reset max_cluster_size in config - // to avoid the possibility of hitting the limit, while we are applying config: - // creating new extensions, roles, etc... - config::with_compute_ctl_tmp_override( - pgdata_path, - "neon.max_cluster_size=-1", - || { - self.pg_reload_conf()?; - - self.apply_config(&compute_state)?; - - Ok(()) - }, - )?; - - let postgresql_conf_path = pgdata_path.join("postgresql.conf"); - if config::line_in_file( - &postgresql_conf_path, - "neon.disable_logical_replication_subscribers=false", - )? { + let conf = self.get_conn_conf(None); + tokio::task::spawn_blocking(|| { + let res = get_installed_extensions(conf); + match res { + Ok(extensions) => { info!( - "updated postgresql.conf to set neon.disable_logical_replication_subscribers=false" + "[NEON_EXT_STAT] {}", + serde_json::to_string(&extensions) + .expect("failed to serialize extensions list") ); } - self.pg_reload_conf()?; + Err(err) => error!("could not get installed extensions: {err:?}"), } - self.post_apply_config()?; + }); - let conf = self.get_conn_conf(None); - tokio::task::spawn_blocking(|| { - let res = get_installed_extensions(conf); - match res { - Ok(extensions) => { - info!( - "[NEON_EXT_STAT] {}", - serde_json::to_string(&extensions) - .expect("failed to serialize extensions list") - ); - } - Err(err) => error!("could not get installed extensions: {err:?}"), - } - }); - } - - let startup_end_time = Utc::now(); - { - let mut state = self.state.lock().unwrap(); - state.metrics.start_postgres_ms = config_time - .signed_duration_since(start_time) - .to_std() - .unwrap() - .as_millis() as u64; - state.metrics.config_ms = startup_end_time - .signed_duration_since(config_time) - .to_std() - .unwrap() - .as_millis() as u64; - state.metrics.total_startup_ms = startup_end_time - .signed_duration_since(compute_state.start_time) - .to_std() - .unwrap() - .as_millis() as u64; - } - self.set_status(ComputeStatus::Running); - - info!( - "finished configuration of compute for project {}", - pspec.spec.cluster.cluster_id.as_deref().unwrap_or("None") - ); - - // Log metrics so that we can search for slow operations in logs - let metrics = { - let state = self.state.lock().unwrap(); - state.metrics.clone() - }; - info!(?metrics, "compute start finished"); - - Ok(pg_process) + Ok(()) } /// Update the `last_active` in the shared state, but ensure that it's a more recent one. @@ -1241,7 +1618,7 @@ impl ComputeNode { pub fn check_for_core_dumps(&self) -> Result<()> { let core_dump_dir = match std::env::consts::OS { "macos" => Path::new("/cores/"), - _ => Path::new(&self.pgdata), + _ => Path::new(&self.params.pgdata), }; // Collect core dump paths if any @@ -1271,7 +1648,7 @@ impl ComputeNode { // Try first with gdb let backtrace = Command::new("gdb") - .args(["--batch", "-q", "-ex", "bt", &self.pgbin]) + .args(["--batch", "-q", "-ex", "bt", &self.params.pgbin]) .arg(&core_path) .output(); @@ -1348,7 +1725,8 @@ LIMIT 100", ext_path: RemotePath, ) -> Result { let ext_remote_storage = - self.ext_remote_storage + self.params + .ext_remote_storage .as_ref() .ok_or(DownloadError::BadInput(anyhow::anyhow!( "Remote extensions storage is not configured", @@ -1411,7 +1789,7 @@ LIMIT 100", &real_ext_name, &ext_path, ext_remote_storage, - &self.pgbin, + &self.params.pgbin, ) .await .map_err(DownloadError::Other); @@ -1519,7 +1897,7 @@ LIMIT 100", &self, spec: &ComputeSpec, ) -> Result { - if self.ext_remote_storage.is_none() { + if self.params.ext_remote_storage.is_none() { return Ok(RemoteExtensionMetrics { num_ext_downloaded: 0, largest_ext_size: 0, @@ -1570,8 +1948,12 @@ LIMIT 100", let mut download_tasks = Vec::new(); for library in &libs_vec { - let (ext_name, ext_path) = - remote_extensions.get_ext(library, true, &self.build_tag, &self.pgversion)?; + let (ext_name, ext_path) = remote_extensions.get_ext( + library, + true, + &self.params.build_tag, + &self.params.pgversion, + )?; download_tasks.push(self.download_extension(ext_name, ext_path)); } let results = join_all(download_tasks).await; diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs index 63d428fff4..3c5a6a6d41 100644 --- a/compute_tools/src/http/routes/configure.rs +++ b/compute_tools/src/http/routes/configure.rs @@ -22,7 +22,7 @@ pub(in crate::http) async fn configure( State(compute): State>, request: Json, ) -> Response { - if !compute.live_config_allowed { + if !compute.params.live_config_allowed { return JsonResponse::error( StatusCode::PRECONDITION_FAILED, "live configuration is not allowed for this compute node".to_string(), diff --git a/compute_tools/src/http/routes/extension_server.rs b/compute_tools/src/http/routes/extension_server.rs index b0265d1e99..563b73ae65 100644 --- a/compute_tools/src/http/routes/extension_server.rs +++ b/compute_tools/src/http/routes/extension_server.rs @@ -18,11 +18,11 @@ pub(in crate::http) struct ExtensionServerParams { /// Download a remote extension. pub(in crate::http) async fn download_extension( Path(filename): Path, - params: Query, + ext_server_params: Query, State(compute): State>, ) -> Response { // Don't even try to download extensions if no remote storage is configured - if compute.ext_remote_storage.is_none() { + if compute.params.ext_remote_storage.is_none() { return JsonResponse::error( StatusCode::PRECONDITION_FAILED, "remote storage is not configured", @@ -46,9 +46,9 @@ pub(in crate::http) async fn download_extension( remote_extensions.get_ext( &filename, - params.is_library, - &compute.build_tag, - &compute.pgversion, + ext_server_params.is_library, + &compute.params.build_tag, + &compute.params.pgversion, ) }; diff --git a/compute_tools/src/logger.rs b/compute_tools/src/logger.rs index 3749dfc844..a65614e94e 100644 --- a/compute_tools/src/logger.rs +++ b/compute_tools/src/logger.rs @@ -1,3 +1,5 @@ +use std::collections::HashMap; +use tracing::info; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::prelude::*; @@ -42,3 +44,50 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result pub fn inlinify(s: &str) -> String { s.replace('\n', "\u{200B}") } + +pub fn startup_context_from_env() -> Option { + // Extract OpenTelemetry context for the startup actions from the + // TRACEPARENT and TRACESTATE env variables, and attach it to the current + // tracing context. + // + // This is used to propagate the context for the 'start_compute' operation + // from the neon control plane. This allows linking together the wider + // 'start_compute' operation that creates the compute container, with the + // startup actions here within the container. + // + // There is no standard for passing context in env variables, but a lot of + // tools use TRACEPARENT/TRACESTATE, so we use that convention too. See + // https://github.com/open-telemetry/opentelemetry-specification/issues/740 + // + // Switch to the startup context here, and exit it once the startup has + // completed and Postgres is up and running. + // + // If this pod is pre-created without binding it to any particular endpoint + // yet, this isn't the right place to enter the startup context. In that + // case, the control plane should pass the tracing context as part of the + // /configure API call. + // + // NOTE: This is supposed to only cover the *startup* actions. Once + // postgres is configured and up-and-running, we exit this span. Any other + // actions that are performed on incoming HTTP requests, for example, are + // performed in separate spans. + // + // XXX: If the pod is restarted, we perform the startup actions in the same + // context as the original startup actions, which probably doesn't make + // sense. + let mut startup_tracing_carrier: HashMap = HashMap::new(); + if let Ok(val) = std::env::var("TRACEPARENT") { + startup_tracing_carrier.insert("traceparent".to_string(), val); + } + if let Ok(val) = std::env::var("TRACESTATE") { + startup_tracing_carrier.insert("tracestate".to_string(), val); + } + if !startup_tracing_carrier.is_empty() { + use opentelemetry::propagation::TextMapPropagator; + use opentelemetry_sdk::propagation::TraceContextPropagator; + info!("got startup tracing context from env variables"); + Some(TraceContextPropagator::new().extract(&startup_tracing_carrier)) + } else { + None + } +} diff --git a/compute_tools/src/monitor.rs b/compute_tools/src/monitor.rs index 248505e473..83318538cd 100644 --- a/compute_tools/src/monitor.rs +++ b/compute_tools/src/monitor.rs @@ -18,7 +18,7 @@ const MONITOR_CHECK_INTERVAL: Duration = Duration::from_millis(500); // should be handled gracefully. fn watch_compute_activity(compute: &ComputeNode) { // Suppose that `connstr` doesn't change - let connstr = compute.connstr.clone(); + let connstr = compute.params.connstr.clone(); let conf = compute.get_conn_conf(Some("compute_ctl:activity_monitor")); // During startup and configuration we connect to every Postgres database, From 38ddfab6433e80ede4947c83a1ee2a53a1f073d9 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Mon, 3 Mar 2025 02:29:37 +0200 Subject: [PATCH 065/207] compute_ctl: Perform more startup actions in parallel (#11008) To speed up compute startup. Resizing swap in particular takes about 100 ms on my laptop. By performing it in parallel with downloading the basebackup, that latency is effectively hidden. I would imagine that downloading remote extensions can also take a non-trivial amount of time, although I didn't try to measure that. In any case that's now also performed in parallel with downloading the basebackup. --- compute_tools/src/compute.rs | 178 ++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 63 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 9e065e84a4..a89d3345c1 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -23,7 +23,7 @@ use postgres::NoTls; use postgres::error::SqlState; use remote_storage::{DownloadError, RemotePath}; use tokio::spawn; -use tracing::{debug, error, info, instrument, warn}; +use tracing::{Instrument, debug, error, info, instrument, warn}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; use utils::measured_stream::MeasuredReader; @@ -522,9 +522,13 @@ impl ComputeNode { /// - status is left in ComputeStatus::Init. The caller is responsible for setting it to Failed /// - if Postgres was started before the fatal error happened, self.running_postgres is /// set. The caller is responsible for killing it. + /// + /// Note that this is in the critical path of a compute cold start. Keep this fast. + /// Try to do things concurrently, to hide the latencies. fn start_compute(self: &Arc, pg_handle: &mut Option) -> Result<()> { let compute_state: ComputeState; + let start_compute_span; let _this_entered; { let mut state_guard = self.state.lock().unwrap(); @@ -536,7 +540,7 @@ impl ComputeNode { // startup to be considered part of the /configure request. // // Similarly, if a trace ID was passed in env variables, attach it to the span. - _this_entered = { + start_compute_span = { // Temporarily enter the parent span, so that the new span becomes its child. if let Some(p) = state_guard.startup_span.take() { let _parent_entered = p.entered(); @@ -549,8 +553,8 @@ impl ComputeNode { } else { tracing::info_span!("start_compute") } - } - .entered(); + }; + _this_entered = start_compute_span.enter(); state_guard.set_status(ComputeStatus::Init, &self.state_changed); compute_state = state_guard.clone() @@ -567,23 +571,44 @@ impl ComputeNode { pspec.spec.remote_extensions, ); - // Launch remaining service threads - let _monitor_handle = launch_monitor(self); - let _configurator_handle = launch_configurator(self); + ////// PRE-STARTUP PHASE: things that need to be finished before we start the Postgres process + + // Collect all the tasks that must finish here + let mut pre_tasks = tokio::task::JoinSet::new(); + + // If there are any remote extensions in shared_preload_libraries, start downloading them + if pspec.spec.remote_extensions.is_some() { + let (this, spec) = (self.clone(), pspec.spec.clone()); + pre_tasks.spawn(async move { + this.download_preload_extensions(&spec) + .in_current_span() + .await + }); + } + + // Prepare pgdata directory. This downloads the basebackup, among other things. + { + let (this, cs) = (self.clone(), compute_state.clone()); + pre_tasks.spawn_blocking_child(move || this.prepare_pgdata(&cs)); + } // Resize swap to the desired size if the compute spec says so if let (Some(size_bytes), true) = (pspec.spec.swap_size_bytes, self.params.resize_swap_on_bind) { - // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion - // *before* starting postgres. - // - // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this - // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets - // OOM-killed during startup because swap wasn't available yet. - resize_swap(size_bytes).context("failed to resize swap")?; - let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display. - info!(%size_bytes, %size_mib, "resized swap"); + pre_tasks.spawn_blocking_child(move || { + // To avoid 'swapoff' hitting postgres startup, we need to run resize-swap to completion + // *before* starting postgres. + // + // In theory, we could do this asynchronously if SkipSwapon was enabled for VMs, but this + // carries a risk of introducing hard-to-debug issues - e.g. if postgres sometimes gets + // OOM-killed during startup because swap wasn't available yet. + resize_swap(size_bytes).context("failed to resize swap")?; + let size_mib = size_bytes as f32 / (1 << 20) as f32; // just for more coherent display. + info!(%size_bytes, %size_mib, "resized swap"); + + Ok::<(), anyhow::Error>(()) + }); } // Set disk quota if the compute spec says so @@ -591,10 +616,15 @@ impl ComputeNode { pspec.spec.disk_quota_bytes, self.params.set_disk_quota_for_fs.as_ref(), ) { - set_disk_quota(disk_quota_bytes, disk_quota_fs_mountpoint) - .context("failed to set disk quota")?; - let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display. - info!(%disk_quota_bytes, %size_mib, "set disk quota"); + let disk_quota_fs_mountpoint = disk_quota_fs_mountpoint.clone(); + pre_tasks.spawn_blocking_child(move || { + set_disk_quota(disk_quota_bytes, &disk_quota_fs_mountpoint) + .context("failed to set disk quota")?; + let size_mib = disk_quota_bytes as f32 / (1 << 20) as f32; // just for more coherent display. + info!(%disk_quota_bytes, %size_mib, "set disk quota"); + + Ok::<(), anyhow::Error>(()) + }); } // tune pgbouncer @@ -628,37 +658,17 @@ impl ComputeNode { }); } - // This part is sync, because we need to download - // remote shared_preload_libraries before postgres start (if any) - if let Some(remote_extensions) = &pspec.spec.remote_extensions { - // First, create control files for all availale extensions - extension_server::create_control_files(remote_extensions, &self.params.pgbin); + // Launch remaining service threads + let _monitor_handle = launch_monitor(self); + let _configurator_handle = launch_configurator(self); - let library_load_start_time = Utc::now(); - let rt = tokio::runtime::Handle::current(); - let remote_ext_metrics = rt.block_on(self.prepare_preload_libraries(&pspec.spec))?; - - let library_load_time = Utc::now() - .signed_duration_since(library_load_start_time) - .to_std() - .unwrap() - .as_millis() as u64; - let mut state = self.state.lock().unwrap(); - state.metrics.load_ext_ms = library_load_time; - state.metrics.num_ext_downloaded = remote_ext_metrics.num_ext_downloaded; - state.metrics.largest_ext_size = remote_ext_metrics.largest_ext_size; - state.metrics.total_ext_download_size = remote_ext_metrics.total_ext_download_size; - info!( - "Loading shared_preload_libraries took {:?}ms", - library_load_time - ); - info!("{:?}", remote_ext_metrics); + // Wait for all the pre-tasks to finish before starting postgres + let rt = tokio::runtime::Handle::current(); + while let Some(res) = rt.block_on(pre_tasks.join_next()) { + res??; } - // Prepre pgdata directory. This downloads the basebackup, among other things. - self.prepare_pgdata(&compute_state)?; - - // Start Postgres + ////// START POSTGRES let start_time = Utc::now(); let pg_process = self.start_postgres(pspec.storage_auth_token.clone())?; let postmaster_pid = pg_process.pid(); @@ -669,6 +679,7 @@ impl ComputeNode { let config_time = Utc::now(); if pspec.spec.mode == ComputeMode::Primary { self.configure_as_primary(&compute_state)?; + let conf = self.get_conn_conf(None); tokio::task::spawn_blocking(|| { let res = get_installed_extensions(conf); @@ -714,6 +725,39 @@ impl ComputeNode { Ok(()) } + #[instrument(skip_all)] + async fn download_preload_extensions(&self, spec: &ComputeSpec) -> Result<()> { + let remote_extensions = if let Some(remote_extensions) = &spec.remote_extensions { + remote_extensions + } else { + return Ok(()); + }; + + // First, create control files for all available extensions + extension_server::create_control_files(remote_extensions, &self.params.pgbin); + + let library_load_start_time = Utc::now(); + let remote_ext_metrics = self.prepare_preload_libraries(spec).await?; + + let library_load_time = Utc::now() + .signed_duration_since(library_load_start_time) + .to_std() + .unwrap() + .as_millis() as u64; + let mut state = self.state.lock().unwrap(); + state.metrics.load_ext_ms = library_load_time; + state.metrics.num_ext_downloaded = remote_ext_metrics.num_ext_downloaded; + state.metrics.largest_ext_size = remote_ext_metrics.largest_ext_size; + state.metrics.total_ext_download_size = remote_ext_metrics.total_ext_download_size; + info!( + "Loading shared_preload_libraries took {:?}ms", + library_load_time + ); + info!("{:?}", remote_ext_metrics); + + Ok(()) + } + /// Start the vm-monitor if directed to. The vm-monitor only runs on linux /// because it requires cgroups. fn start_vm_monitor(&self, disable_lfc_resizing: bool) -> StartVmMonitorResult { @@ -1574,21 +1618,6 @@ impl ComputeNode { } self.post_apply_config()?; - let conf = self.get_conn_conf(None); - tokio::task::spawn_blocking(|| { - let res = get_installed_extensions(conf); - match res { - Ok(extensions) => { - info!( - "[NEON_EXT_STAT] {}", - serde_json::to_string(&extensions) - .expect("failed to serialize extensions list") - ); - } - Err(err) => error!("could not get installed extensions: {err:?}"), - } - }); - Ok(()) } @@ -2030,3 +2059,26 @@ pub fn forward_termination_signal() { kill(pg_pid, Signal::SIGINT).ok(); } } + +// helper trait to call JoinSet::spawn_blocking(f), but propagates the current +// tracing span to the thread. +trait JoinSetExt { + fn spawn_blocking_child(&mut self, f: F) -> tokio::task::AbortHandle + where + F: FnOnce() -> T + Send + 'static, + T: Send; +} + +impl JoinSetExt for tokio::task::JoinSet { + fn spawn_blocking_child(&mut self, f: F) -> tokio::task::AbortHandle + where + F: FnOnce() -> T + Send + 'static, + T: Send, + { + let sp = tracing::Span::current(); + self.spawn_blocking(move || { + let _e = sp.enter(); + f() + }) + } +} From df0767176ad2dee3bfaa0fef30f33959e08ca2e6 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Mon, 3 Mar 2025 10:40:49 +0100 Subject: [PATCH 066/207] Change the tags names according to the curent state (#11059) ## Problem We have not synced `force-test-extensions-upgrade.yml` with the last changes. The variable `TEST_EXTENSIONS_UPGRADE` was ignored in the script and actually set to `NEW_COMPUTE_TAG` while it should be set to `OLD_COMPUTE_TAG` as we are about to run compatibility tests. ## Summary of changes The tag names were synced, the logic was fixed. --- .github/workflows/force-test-extensions-upgrade.yml | 5 +++-- docker-compose/test_extensions_upgrade.sh | 11 +++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/workflows/force-test-extensions-upgrade.yml b/.github/workflows/force-test-extensions-upgrade.yml index 71c5158ef6..f2376306dc 100644 --- a/.github/workflows/force-test-extensions-upgrade.yml +++ b/.github/workflows/force-test-extensions-upgrade.yml @@ -52,8 +52,9 @@ jobs: - name: Test extension upgrade timeout-minutes: 20 env: - NEWTAG: latest - OLDTAG: ${{ steps.get-last-compute-release-tag.outputs.tag }} + NEW_COMPUTE_TAG: latest + OLD_COMPUTE_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }} + TEST_EXTENSIONS_TAG: ${{ steps.get-last-compute-release-tag.outputs.tag }} PG_VERSION: ${{ matrix.pg-version }} FORCE_ALL_UPGRADE_TESTS: true run: ./docker-compose/test_extensions_upgrade.sh diff --git a/docker-compose/test_extensions_upgrade.sh b/docker-compose/test_extensions_upgrade.sh index 57c0182162..51d1e40802 100755 --- a/docker-compose/test_extensions_upgrade.sh +++ b/docker-compose/test_extensions_upgrade.sh @@ -6,8 +6,11 @@ generate_id() { local -n resvar=$1 printf -v resvar '%08x%08x%08x%08x' $SRANDOM $SRANDOM $SRANDOM $SRANDOM } -if [ -z ${OLD_COMPUTE_TAG+x} ] || [ -z ${NEW_COMPUTE_TAG+x} ] || [ -z "${OLD_COMPUTE_TAG}" ] || [ -z "${NEW_COMPUTE_TAG}" ]; then - echo OLD_COMPUTE_TAG and NEW_COMPUTE_TAG must be defined +echo "${OLD_COMPUTE_TAG}" +echo "${NEW_COMPUTE_TAG}" +echo "${TEST_EXTENSIONS_TAG}" +if [ -z "${OLD_COMPUTE_TAG:-}" ] || [ -z "${NEW_COMPUTE_TAG:-}" ] || [ -z "${TEST_EXTENSIONS_TAG:-}" ]; then + echo OLD_COMPUTE_TAG, NEW_COMPUTE_TAG and TEST_EXTENSIONS_TAG must be set exit 1 fi export PG_VERSION=${PG_VERSION:-16} @@ -82,7 +85,7 @@ EXTENSIONS='[ {"extname": "pg_repack", "extdir": "pg_repack-src"} ]' EXTNAMES=$(echo ${EXTENSIONS} | jq -r '.[].extname' | paste -sd ' ' -) -COMPUTE_TAG=${NEW_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d +COMPUTE_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" @@ -90,7 +93,7 @@ create_extensions "${EXTNAMES}" query="select json_object_agg(extname,extversion) from pg_extension where extname in ('${EXTNAMES// /\',\'}')" new_vers=$(docker compose exec neon-test-extensions psql -Aqt -d contrib_regression -c "$query") docker compose --profile test-extensions down -COMPUTE_TAG=${OLD_COMPUTE_TAG} TEST_EXTENSIONS_TAG=${NEW_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate +COMPUTE_TAG=${OLD_COMPUTE_TAG} docker compose --profile test-extensions up --quiet-pull --build -d --force-recreate wait_for_ready docker compose exec neon-test-extensions psql -c "DROP DATABASE IF EXISTS contrib_regression" docker compose exec neon-test-extensions psql -c "CREATE DATABASE contrib_regression" From 625c526bdd271d845b9e42c2aae197c8b09a0d77 Mon Sep 17 00:00:00 2001 From: Misha Sakhnov Date: Mon, 3 Mar 2025 13:47:09 +0200 Subject: [PATCH 067/207] ci: create multiarch vm images (#11017) ## Problem We build compute-nodes as multi-arch images, but not the vm-compute-nodes. The PR adds multiarch vm images the same way as in autoscaling repo. ## Summary of changes Add architecture to the matrix for vm compute build steps Add merge job --------- Co-authored-by: Alexander Bayandin --- .github/workflows/build_and_test.yml | 39 +++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fb6da2f173..ac6e0634f0 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -692,15 +692,15 @@ jobs: neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-x64 \ neondatabase/neon-test-extensions-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.version.debian }}-arm64 - vm-compute-node-image: + vm-compute-node-image-arch: needs: [ check-permissions, meta, compute-node-image ] if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} - runs-on: [ self-hosted, large ] + runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} strategy: fail-fast: false matrix: + arch: [ amd64, arm64 ] version: - # see the comment for `compute-node-image-arch` job - pg: v14 debian: bullseye - pg: v15 @@ -717,7 +717,7 @@ jobs: - name: Downloading vm-builder run: | - curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-amd64 -o vm-builder + curl -fL https://github.com/neondatabase/autoscaling/releases/download/$VM_BUILDER_VERSION/vm-builder-${{ matrix.arch }} -o vm-builder chmod +x vm-builder - uses: neondatabase/dev-actions/set-docker-config-dir@6094485bf440001c94a94a3f9e221e81ff6b6193 @@ -738,12 +738,37 @@ jobs: -size=2G \ -spec=compute/vm-image-spec-${{ matrix.version.debian }}.yaml \ -src=neondatabase/compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ - -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ - -target-arch=linux/amd64 + -dst=neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} \ + -target-arch=linux/${{ matrix.arch }} - name: Pushing vm-compute-node image run: | - docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} + docker push neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-${{ matrix.arch }} + + vm-compute-node-image: + needs: [ vm-compute-node-image-arch, meta ] + if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + runs-on: ubuntu-22.04 + strategy: + matrix: + version: + # see the comment for `compute-node-image-arch` job + - pg: v14 + - pg: v15 + - pg: v16 + - pg: v17 + steps: + - uses: docker/login-action@v3 + with: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + + - name: Create multi-arch compute-node image + run: | + docker buildx imagetools create -t neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }} \ + neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-amd64 \ + neondatabase/vm-compute-node-${{ matrix.version.pg }}:${{ needs.meta.outputs.build-tag }}-arm64 + test-images: needs: [ check-permissions, meta, neon-image, compute-node-image ] From 8669bfe4937dafc36f430cbc2401b84f186ca81c Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 3 Mar 2025 14:50:07 +0200 Subject: [PATCH 068/207] Do not store zero pages in inmem SMGR for walredo (#11043) ## Problem See https://neondb.slack.com/archives/C033RQ5SPDH/p1740157873114339 smgrextend for FSM fork is called during page reconstruction by walredo process causing overflow of inmem SMGR (64 pages). ## Summary of changes Do not store zero pages in inmem SMGR because `inmem_read` returns zero page if it is not able to locate specified block. Co-authored-by: Konstantin Knizhnik --- pgxn/neon_walredo/inmem_smgr.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pgxn/neon_walredo/inmem_smgr.c b/pgxn/neon_walredo/inmem_smgr.c index ff2846a9e7..75b9ab4464 100644 --- a/pgxn/neon_walredo/inmem_smgr.c +++ b/pgxn/neon_walredo/inmem_smgr.c @@ -32,8 +32,8 @@ #include "inmem_smgr.h" -/* Size of the in-memory smgr: XLR_MAX_BLOCK_ID is 32, but we can update up to 3 forks for each block */ -#define MAX_PAGES 100 +/* Size of the in-memory smgr: XLR_MAX_BLOCK_ID is 32, so assume that 64 will be enough */ +#define MAX_PAGES 64 /* If more than WARN_PAGES are used, print a warning in the log */ #define WARN_PAGES 32 @@ -174,10 +174,7 @@ static void inmem_zeroextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, int nblocks, bool skipFsync) { - char buffer[BLCKSZ] = {0}; - - for (int i = 0; i < nblocks; i++) - inmem_extend(reln, forknum, blocknum + i, buffer, skipFsync); + /* Do nothing: inmem_read will return zero page in any case */ } #endif From ef2b50994cab9702a84aca490d70f11fb0d1036b Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Mon, 3 Mar 2025 16:20:20 +0300 Subject: [PATCH 069/207] walproposer: basic infra to enable generations (#11002) ## Problem Preparation for https://github.com/neondatabase/neon/issues/10851 ## Summary of changes Add walproposer `safekeepers_generations` field which can be set by prefixing `neon.safekeepers` GUC with `g#n:`. Non zero value (n) forces walproposer to use generations. In particular, this also disables implicit timeline creation as timeline will be created by storcon. Add test checking this. Also add missing infra: `--safekeepers-generation` flag to neon_local endpoint start + fix `--start-timeout` flag: it existed but value wasn't used. --- compute_tools/src/config.rs | 14 ++++- control_plane/src/bin/neon_local.rs | 20 +++++-- control_plane/src/endpoint.rs | 25 ++++++--- libs/compute_api/src/spec.rs | 11 ++++ pgxn/neon/walproposer.c | 69 ++++++++++++++++++++++-- pgxn/neon/walproposer.h | 19 ++++++- test_runner/fixtures/neon_cli.py | 6 +++ test_runner/fixtures/neon_fixtures.py | 8 ++- test_runner/regress/test_wal_acceptor.py | 48 +++++++++++++++++ 9 files changed, 200 insertions(+), 20 deletions(-) diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index e8056ec7eb..ca24ff76b3 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -1,5 +1,7 @@ +use std::fmt::Write as FmtWrite; use std::fs::{File, OpenOptions}; use std::io; +use std::io::Write; use std::io::prelude::*; use std::path::Path; @@ -55,10 +57,20 @@ pub fn write_postgres_conf( writeln!(file, "neon.stripe_size={stripe_size}")?; } if !spec.safekeeper_connstrings.is_empty() { + let mut neon_safekeepers_value = String::new(); + tracing::info!( + "safekeepers_connstrings is not zero, gen: {:?}", + spec.safekeepers_generation + ); + // If generation is given, prepend sk list with g#number: + if let Some(generation) = spec.safekeepers_generation { + write!(neon_safekeepers_value, "g#{}:", generation)?; + } + neon_safekeepers_value.push_str(&spec.safekeeper_connstrings.join(",")); writeln!( file, "neon.safekeepers={}", - escape_conf_value(&spec.safekeeper_connstrings.join(",")) + escape_conf_value(&neon_safekeepers_value) )?; } if let Some(s) = &spec.tenant_id { diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index f258025428..375b5d87d0 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -40,6 +40,7 @@ use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInf use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId}; use postgres_backend::AuthType; use postgres_connection::parse_host_port; +use safekeeper_api::membership::SafekeeperGeneration; use safekeeper_api::{ DEFAULT_HTTP_LISTEN_PORT as DEFAULT_SAFEKEEPER_HTTP_PORT, DEFAULT_PG_LISTEN_PORT as DEFAULT_SAFEKEEPER_PG_PORT, @@ -596,7 +597,15 @@ struct EndpointStartCmdArgs { #[clap(long = "pageserver-id")] endpoint_pageserver_id: Option, - #[clap(long)] + #[clap( + long, + help = "Safekeepers membership generation to prefix neon.safekeepers with. Normally neon_local sets it on its own, but this option allows to override. Non zero value forces endpoint to use membership configurations." + )] + safekeepers_generation: Option, + #[clap( + long, + help = "List of safekeepers endpoint will talk to. Normally neon_local chooses them on its own, but this option allows to override." + )] safekeepers: Option, #[clap( @@ -617,9 +626,9 @@ struct EndpointStartCmdArgs { )] allow_multiple: bool, - #[clap(short = 't', long, help = "timeout until we fail the command")] - #[arg(default_value = "10s")] - start_timeout: humantime::Duration, + #[clap(short = 't', long, value_parser= humantime::parse_duration, help = "timeout until we fail the command")] + #[arg(default_value = "90s")] + start_timeout: Duration, } #[derive(clap::Args)] @@ -1350,6 +1359,7 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res let pageserver_id = args.endpoint_pageserver_id; let remote_ext_config = &args.remote_ext_config; + let safekeepers_generation = args.safekeepers_generation.map(SafekeeperGeneration::new); // If --safekeepers argument is given, use only the listed // safekeeper nodes; otherwise all from the env. let safekeepers = if let Some(safekeepers) = parse_safekeepers(&args.safekeepers)? { @@ -1425,11 +1435,13 @@ async fn handle_endpoint(subcmd: &EndpointCmd, env: &local_env::LocalEnv) -> Res endpoint .start( &auth_token, + safekeepers_generation, safekeepers, pageservers, remote_ext_config.as_ref(), stripe_size.0 as usize, args.create_test_user, + args.start_timeout, ) .await?; } diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 50ccca36fe..87bfbd7570 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -42,7 +42,7 @@ use std::path::PathBuf; use std::process::Command; use std::str::FromStr; use std::sync::Arc; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; use anyhow::{Context, Result, anyhow, bail}; use compute_api::requests::ConfigurationRequest; @@ -53,6 +53,7 @@ use compute_api::spec::{ use nix::sys::signal::{Signal, kill}; use pageserver_api::shard::ShardStripeSize; use reqwest::header::CONTENT_TYPE; +use safekeeper_api::membership::SafekeeperGeneration; use serde::{Deserialize, Serialize}; use tracing::debug; use url::Host; @@ -576,14 +577,17 @@ impl Endpoint { Ok(safekeeper_connstrings) } + #[allow(clippy::too_many_arguments)] pub async fn start( &self, auth_token: &Option, + safekeepers_generation: Option, safekeepers: Vec, pageservers: Vec<(Host, u16)>, remote_ext_config: Option<&String>, shard_stripe_size: usize, create_test_user: bool, + start_timeout: Duration, ) -> Result<()> { if self.status() == EndpointStatus::Running { anyhow::bail!("The endpoint is already running"); @@ -655,6 +659,7 @@ impl Endpoint { timeline_id: Some(self.timeline_id), mode: self.mode, pageserver_connstring: Some(pageserver_connstring), + safekeepers_generation: safekeepers_generation.map(|g| g.into_inner()), safekeeper_connstrings, storage_auth_token: auth_token.clone(), remote_extensions, @@ -770,17 +775,18 @@ impl Endpoint { std::fs::write(pidfile_path, pid.to_string())?; // Wait for it to start - let mut attempt = 0; const ATTEMPT_INTERVAL: Duration = Duration::from_millis(100); - const MAX_ATTEMPTS: u32 = 10 * 90; // Wait up to 1.5 min + let start_at = Instant::now(); loop { - attempt += 1; match self.get_status().await { Ok(state) => { match state.status { ComputeStatus::Init => { - if attempt == MAX_ATTEMPTS { - bail!("compute startup timed out; still in Init state"); + if Instant::now().duration_since(start_at) > start_timeout { + bail!( + "compute startup timed out {:?}; still in Init state", + start_timeout + ); } // keep retrying } @@ -807,8 +813,11 @@ impl Endpoint { } } Err(e) => { - if attempt == MAX_ATTEMPTS { - return Err(e).context("timed out waiting to connect to compute_ctl HTTP"); + if Instant::now().duration_since(start_at) > start_timeout { + return Err(e).context(format!( + "timed out {:?} waiting to connect to compute_ctl HTTP", + start_timeout, + )); } } } diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index d02bfd6814..df82d8b449 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -101,6 +101,17 @@ pub struct ComputeSpec { pub timeline_id: Option, pub pageserver_connstring: Option, + /// Safekeeper membership config generation. It is put in + /// neon.safekeepers GUC and serves two purposes: + /// 1) Non zero value forces walproposer to use membership configurations. + /// 2) If walproposer wants to update list of safekeepers to connect to + /// taking them from some safekeeper mconf, it should check what value + /// is newer by comparing the generation. + /// + /// Note: it could be SafekeeperGeneration, but this needs linking + /// compute_ctl with postgres_ffi. + #[serde(default)] + pub safekeepers_generation: Option, #[serde(default)] pub safekeeper_connstrings: Vec, diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 356895aa82..7ec4ec99fc 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -83,6 +83,7 @@ static void AssertEventsOkForState(uint32 events, Safekeeper *sk); static char *FormatEvents(WalProposer *wp, uint32 events); static void UpdateDonorShmem(WalProposer *wp); static char *MembershipConfigurationToString(MembershipConfiguration *mconf); +static void MembershipConfigurationCopy(MembershipConfiguration *src, MembershipConfiguration *dst); static void MembershipConfigurationFree(MembershipConfiguration *mconf); WalProposer * @@ -97,7 +98,32 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api) wp->config = config; wp->api = api; - for (host = wp->config->safekeepers_list; host != NULL && *host != '\0'; host = sep) + wp_log(LOG, "neon.safekeepers=%s", wp->config->safekeepers_list); + + /* + * If safekeepers list starts with g# parse generation number followed by + * : + */ + if (strncmp(wp->config->safekeepers_list, "g#", 2) == 0) + { + char *endptr; + + errno = 0; + wp->safekeepers_generation = strtoul(wp->config->safekeepers_list + 2, &endptr, 10); + if (errno != 0) + { + wp_log(FATAL, "failed to parse neon.safekeepers generation number: %m"); + } + /* Skip past : to the first hostname. */ + host = endptr + 1; + } + else + { + host = wp->config->safekeepers_list; + } + wp_log(LOG, "safekeepers_generation=%u", wp->safekeepers_generation); + + for (; host != NULL && *host != '\0'; host = sep) { port = strchr(host, ':'); if (port == NULL) @@ -183,6 +209,12 @@ WalProposerFree(WalProposer *wp) pfree(wp); } +static bool +WalProposerGenerationsEnabled(WalProposer *wp) +{ + return wp->safekeepers_generation != 0; +} + /* * Create new AppendRequest message and start sending it. This function is * called from walsender every time the new WAL is available. @@ -600,10 +632,14 @@ static void SendStartWALPush(Safekeeper *sk) { WalProposer *wp = sk->wp; + + /* Forbid implicit timeline creation if generations are enabled. */ + char *allow_timeline_creation = WalProposerGenerationsEnabled(wp) ? "false" : "true"; #define CMD_LEN 512 char cmd[CMD_LEN]; - snprintf(cmd, CMD_LEN, "START_WAL_PUSH (proto_version '%d')", wp->config->proto_version); + + snprintf(cmd, CMD_LEN, "START_WAL_PUSH (proto_version '%d', allow_timeline_creation '%s')", wp->config->proto_version, allow_timeline_creation); if (!wp->api.conn_send_query(sk, cmd)) { wp_log(WARNING, "failed to send '%s' query to safekeeper %s:%s: %s", @@ -705,6 +741,18 @@ RecvAcceptorGreeting(Safekeeper *sk) sk->host, sk->port, sk->greetResponse.nodeId, mconf_toml, sk->greetResponse.term); pfree(mconf_toml); + /* + * Adopt mconf of safekeepers if it is higher. TODO: mconf change should + * restart wp if it started voting. + */ + if (sk->greetResponse.mconf.generation > wp->mconf.generation) + { + MembershipConfigurationFree(&wp->mconf); + MembershipConfigurationCopy(&sk->greetResponse.mconf, &wp->mconf); + /* full conf was just logged above */ + wp_log(LOG, "changed mconf to generation %u", wp->mconf.generation); + } + /* Protocol is all good, move to voting. */ sk->state = SS_VOTING; @@ -1896,7 +1944,8 @@ PAMessageSerialize(WalProposer *wp, ProposerAcceptorMessage *msg, StringInfo buf pq_sendint64_le(buf, m->termHistory->entries[i].term); pq_sendint64_le(buf, m->termHistory->entries[i].lsn); } - /* + + /* * Removed timeline_start_lsn. Still send it as a valid * value until safekeepers taking it from term history are * deployed. @@ -2162,7 +2211,7 @@ AsyncReadMessage(Safekeeper *sk, AcceptorProposerMessage *anymsg) } } wp_log(FATAL, "unsupported proto_version %d", wp->config->proto_version); - return false; /* keep the compiler quiet */ + return false; /* keep the compiler quiet */ } /* @@ -2570,6 +2619,18 @@ MembershipConfigurationToString(MembershipConfiguration *mconf) return s.data; } +static void +MembershipConfigurationCopy(MembershipConfiguration *src, MembershipConfiguration *dst) +{ + dst->generation = src->generation; + dst->members.len = src->members.len; + dst->members.m = palloc0(sizeof(SafekeeperId) * dst->members.len); + memcpy(dst->members.m, src->members.m, sizeof(SafekeeperId) * dst->members.len); + dst->new_members.len = src->new_members.len; + dst->new_members.m = palloc0(sizeof(SafekeeperId) * dst->new_members.len); + memcpy(dst->new_members.m, src->new_members.m, sizeof(SafekeeperId) * dst->new_members.len); +} + static void MembershipConfigurationFree(MembershipConfiguration *mconf) { diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h index eee55f924f..8d1ae26cac 100644 --- a/pgxn/neon/walproposer.h +++ b/pgxn/neon/walproposer.h @@ -160,7 +160,10 @@ typedef struct MemberSet SafekeeperId *m; /* ids themselves */ } MemberSet; -/* Timeline safekeeper membership configuration. */ +/* + * Timeline safekeeper membership configuration as sent in the + * protocol. + */ typedef struct MembershipConfiguration { Generation generation; @@ -761,8 +764,22 @@ typedef struct WalProposer /* (n_safekeepers / 2) + 1 */ int quorum; + /* + * Generation of the membership conf of which safekeepers[] are presumably + * members. To make cplane life a bit easier and have more control in + * tests with which sks walproposer gets connected neon.safekeepers GUC + * doesn't provide full mconf, only the list of endpoints to connect to. + * We still would like to know generation associated with it because 1) we + * need some handle to enforce using generations in walproposer, and + * non-zero value of this serves the purpose; 2) currently we don't do + * that, but in theory walproposer can update list of safekeepers to + * connect to upon receiving mconf from safekeepers, and generation number + * must be checked to see which list is newer. + */ + Generation safekeepers_generation; /* Number of occupied slots in safekeepers[] */ int n_safekeepers; + /* Safekeepers walproposer is connecting to. */ Safekeeper safekeeper[MAX_SAFEKEEPERS]; /* WAL has been generated up to this point */ diff --git a/test_runner/fixtures/neon_cli.py b/test_runner/fixtures/neon_cli.py index 97a5a36814..6e53987e7c 100644 --- a/test_runner/fixtures/neon_cli.py +++ b/test_runner/fixtures/neon_cli.py @@ -525,12 +525,14 @@ class NeonLocalCli(AbstractNeonCli): def endpoint_start( self, endpoint_id: str, + safekeepers_generation: int | None = None, safekeepers: list[int] | None = None, remote_ext_config: str | None = None, pageserver_id: int | None = None, allow_multiple: bool = False, create_test_user: bool = False, basebackup_request_tries: int | None = None, + timeout: str | None = None, env: dict[str, str] | None = None, ) -> subprocess.CompletedProcess[str]: args = [ @@ -543,6 +545,8 @@ class NeonLocalCli(AbstractNeonCli): if remote_ext_config is not None: args.extend(["--remote-ext-config", remote_ext_config]) + if safekeepers_generation is not None: + args.extend(["--safekeepers-generation", str(safekeepers_generation)]) if safekeepers is not None: args.extend(["--safekeepers", (",".join(map(str, safekeepers)))]) if endpoint_id is not None: @@ -553,6 +557,8 @@ class NeonLocalCli(AbstractNeonCli): args.extend(["--allow-multiple"]) if create_test_user: args.extend(["--create-test-user"]) + if timeout is not None: + args.extend(["--start-timeout", str(timeout)]) res = self.raw_cli(args, extra_env_vars) res.check_returncode() diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 6001003e53..53df10be49 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -4005,10 +4005,12 @@ class Endpoint(PgProtocol, LogUtils): self, remote_ext_config: str | None = None, pageserver_id: int | None = None, + safekeeper_generation: int | None = None, safekeepers: list[int] | None = None, allow_multiple: bool = False, create_test_user: bool = False, basebackup_request_tries: int | None = None, + timeout: str | None = None, env: dict[str, str] | None = None, ) -> Self: """ @@ -4018,19 +4020,21 @@ class Endpoint(PgProtocol, LogUtils): assert self.endpoint_id is not None - # If `safekeepers` is not None, they are remember them as active and use - # in the following commands. + # If `safekeepers` is not None, remember them as active and use in the + # following commands. if safekeepers is not None: self.active_safekeepers = safekeepers self.env.neon_cli.endpoint_start( self.endpoint_id, + safekeepers_generation=safekeeper_generation, safekeepers=self.active_safekeepers, remote_ext_config=remote_ext_config, pageserver_id=pageserver_id, allow_multiple=allow_multiple, create_test_user=create_test_user, basebackup_request_tries=basebackup_request_tries, + timeout=timeout, env=env, ) self._running.release(1) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 0a05189bfb..8f70b460c6 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -2281,6 +2281,54 @@ def test_membership_api(neon_env_builder: NeonEnvBuilder): http_cli.timeline_status(tenant_id, timeline_id) +def test_explicit_timeline_creation(neon_env_builder: NeonEnvBuilder): + """ + Test that having neon.safekeepers starting with g#n: with non zero n enables + generations, which as a side effect disables automatic timeline creation. + + This is kind of bootstrapping test: here membership conf & timeline is + created manually, later storcon will do that. + """ + neon_env_builder.num_safekeepers = 3 + env = neon_env_builder.init_start() + + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + ps = env.pageservers[0] + ps_http_cli = ps.http_client() + + http_clis = [sk.http_client() for sk in env.safekeepers] + + config_lines = [ + "neon.safekeeper_proto_version = 3", + ] + ep = env.endpoints.create("main", config_lines=config_lines) + + # expected to fail because timeline is not created on safekeepers + with pytest.raises(Exception, match=r".*timed out.*"): + ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3], timeout="2s") + # figure out initial LSN. + ps_timeline_detail = ps_http_cli.timeline_detail(tenant_id, timeline_id) + init_lsn = ps_timeline_detail["last_record_lsn"] + log.info(f"initial LSN: {init_lsn}") + # sk timeline creation request expects minor version + pg_version = ps_timeline_detail["pg_version"] * 10000 + # create inital mconf + sk_ids = [SafekeeperId(sk.id, "localhost", sk.port.pg_tenant_only) for sk in env.safekeepers] + mconf = Configuration(generation=1, members=sk_ids, new_members=None) + create_r = TimelineCreateRequest( + tenant_id, timeline_id, mconf, pg_version, Lsn(init_lsn), commit_lsn=None + ) + log.info(f"sending timeline create: {create_r.to_json()}") + + for sk_http_cli in http_clis: + sk_http_cli.timeline_create(create_r) + # Once timeline created endpoint should start. + ep.start(safekeeper_generation=1, safekeepers=[1, 2, 3]) + ep.safe_psql("CREATE TABLE IF NOT EXISTS t(key int, value text)") + + # In this test we check for excessive START_REPLICATION and START_WAL_PUSH queries # when compute is active, but there are no writes to the timeline. In that case # pageserver should maintain a single connection to safekeeper and don't attempt From 38277497fd400e2948293c8e29512be8dc231735 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 3 Mar 2025 13:46:50 +0000 Subject: [PATCH 070/207] pageserver: log shutdown at info level for basebackup (#11046) ## Problem Timeline shutdown during basebackup logs at error level because the the canecellation error is smushed into BasebackupError::Server. ## Summary of changes Introduce BasebackupError::Shutdown and use it. `log_query_error` will now see `QueryError::Shutdown` and log at info level. --- pageserver/src/basebackup.rs | 67 +++++++++++++++++++--------------- pageserver/src/page_service.rs | 1 + 2 files changed, 38 insertions(+), 30 deletions(-) diff --git a/pageserver/src/basebackup.rs b/pageserver/src/basebackup.rs index ce54bd9c1c..de527e307b 100644 --- a/pageserver/src/basebackup.rs +++ b/pageserver/src/basebackup.rs @@ -33,8 +33,9 @@ use utils::lsn::Lsn; use crate::context::RequestContext; use crate::pgdatadir_mapping::Version; -use crate::tenant::Timeline; use crate::tenant::storage_layer::IoConcurrency; +use crate::tenant::timeline::GetVectoredError; +use crate::tenant::{PageReconstructError, Timeline}; #[derive(Debug, thiserror::Error)] pub enum BasebackupError { @@ -42,6 +43,26 @@ pub enum BasebackupError { Server(#[from] anyhow::Error), #[error("basebackup client error {0:#} when {1}")] Client(#[source] io::Error, &'static str), + #[error("basebackup during shutdown")] + Shutdown, +} + +impl From for BasebackupError { + fn from(value: PageReconstructError) -> Self { + match value { + PageReconstructError::Cancelled => BasebackupError::Shutdown, + err => BasebackupError::Server(err.into()), + } + } +} + +impl From for BasebackupError { + fn from(value: GetVectoredError) -> Self { + match value { + GetVectoredError::Cancelled => BasebackupError::Shutdown, + err => BasebackupError::Server(err.into()), + } + } } /// Create basebackup with non-rel data in it. @@ -127,7 +148,7 @@ where timeline .gate .enter() - .map_err(|e| BasebackupError::Server(e.into()))?, + .map_err(|_| BasebackupError::Shutdown)?, ), }; basebackup @@ -323,8 +344,7 @@ where let slru_partitions = self .timeline .get_slru_keyspace(Version::Lsn(self.lsn), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))? + .await? .partition( self.timeline.get_shard_identity(), Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64, @@ -336,11 +356,10 @@ where let blocks = self .timeline .get_vectored(part, self.lsn, self.io_concurrency.clone(), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; for (key, block) in blocks { - let block = block.map_err(|e| BasebackupError::Server(e.into()))?; + let block = block?; slru_builder.add_block(&key, block).await?; } } @@ -349,11 +368,8 @@ where let mut min_restart_lsn: Lsn = Lsn::MAX; // Create tablespace directories - for ((spcnode, dbnode), has_relmap_file) in self - .timeline - .list_dbdirs(self.lsn, self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))? + for ((spcnode, dbnode), has_relmap_file) in + self.timeline.list_dbdirs(self.lsn, self.ctx).await? { self.add_dbdir(spcnode, dbnode, has_relmap_file).await?; @@ -362,8 +378,7 @@ where let rels = self .timeline .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; for &rel in rels.iter() { // Send init fork as main fork to provide well formed empty // contents of UNLOGGED relations. Postgres copies it in @@ -391,8 +406,7 @@ where let aux_files = self .timeline .list_aux_files(self.lsn, self.ctx, self.io_concurrency.clone()) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; let aux_scan_time = start_time.elapsed(); let aux_estimated_size = aux_files .values() @@ -451,16 +465,14 @@ where for xid in self .timeline .list_twophase_files(self.lsn, self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))? + .await? { self.add_twophase_file(xid).await?; } let repl_origins = self .timeline .get_replorigins(self.lsn, self.ctx, self.io_concurrency.clone()) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; let n_origins = repl_origins.len(); if n_origins != 0 { // @@ -505,8 +517,7 @@ where let nblocks = self .timeline .get_rel_size(src, Version::Lsn(self.lsn), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; // If the relation is empty, create an empty file if nblocks == 0 { @@ -532,8 +543,7 @@ where // TODO: investigate using get_vectored for the entire startblk..endblk range. // But this code path is not on the critical path for most basebackups (?). .get(rel_block_to_key(src, blknum), self.lsn, self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; segment_data.extend_from_slice(&img[..]); } @@ -567,8 +577,7 @@ where let img = self .timeline .get_relmap_file(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; if img.len() != dispatch_pgversion!(self.timeline.pg_version, pgv::bindings::SIZEOF_RELMAPFILE) @@ -622,8 +631,7 @@ where && self .timeline .list_rels(spcnode, dbnode, Version::Lsn(self.lsn), self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))? + .await? .is_empty() { return Ok(()); @@ -674,8 +682,7 @@ where let img = self .timeline .get_twophase_file(xid, self.lsn, self.ctx) - .await - .map_err(|e| BasebackupError::Server(e.into()))?; + .await?; let mut buf = BytesMut::new(); buf.extend_from_slice(&img[..]); diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 603a5f65aa..ba2ed9dc81 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -2113,6 +2113,7 @@ impl PageServerHandler { // TODO: passthrough the error site to the final error message? BasebackupError::Client(e, _) => QueryError::Disconnected(ConnectionError::Io(e)), BasebackupError::Server(e) => QueryError::Other(e), + BasebackupError::Shutdown => QueryError::Shutdown, } } From a07599949fdcf7fdd1e396b9bb53b667a2f34948 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Mon, 3 Mar 2025 16:25:48 +0100 Subject: [PATCH 071/207] First version of a new benchmark to test larger OLTP workload (#11053) ## Problem We want to support larger tenants (regarding logical database size, number of transactions per second etc.) and should increase our test coverage of OLTP transactions at larger scale. ## Summary of changes Start a new benchmark that over time will add more OLTP tests at larger scale. This PR covers the first version and will be extended in further PRs. Also fix some infrastructure: - default for new connections and large tenants is to use connection pooler pgbouncer, however our fixture always added `statement_timeout=120` which is not compatible with pooler [see](https://neon.tech/docs/connect/connection-errors#unsupported-startup-parameter) - action to create branch timed out after 10 seconds and 10 retries but for large tenants it can take longer so use increasing back-off for retries ## Test run https://github.com/neondatabase/neon/actions/runs/13593446706 --- .github/actionlint.yml | 1 + .github/actions/neon-branch-create/action.yml | 12 +- .github/workflows/large_oltp_benchmark.yml | 147 ++++++++++++++++++ test_runner/fixtures/neon_fixtures.py | 7 +- .../large_synthetic_oltp/insert_webhooks.sql | 47 ++++++ .../select_any_webhook_with_skew.sql | 15 ++ .../select_recent_webhook.sql | 9 ++ .../test_perf_oltp_large_tenant.py | 90 +++++++++++ 8 files changed, 324 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/large_oltp_benchmark.yml create mode 100644 test_runner/performance/large_synthetic_oltp/insert_webhooks.sql create mode 100644 test_runner/performance/large_synthetic_oltp/select_any_webhook_with_skew.sql create mode 100644 test_runner/performance/large_synthetic_oltp/select_recent_webhook.sql create mode 100644 test_runner/performance/test_perf_oltp_large_tenant.py diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 1e6c2d0aa2..667ff7f92e 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -32,3 +32,4 @@ config-variables: - NEON_DEV_AWS_ACCOUNT_ID - NEON_PROD_AWS_ACCOUNT_ID - AWS_ECR_REGION + - BENCHMARK_LARGE_OLTP_PROJECTID diff --git a/.github/actions/neon-branch-create/action.yml b/.github/actions/neon-branch-create/action.yml index 9f752d5a89..71dd6f3af2 100644 --- a/.github/actions/neon-branch-create/action.yml +++ b/.github/actions/neon-branch-create/action.yml @@ -84,7 +84,13 @@ runs: --header "Authorization: Bearer ${API_KEY}" ) - role_name=$(echo $roles | jq --raw-output '.roles[] | select(.protected == false) | .name') + role_name=$(echo "$roles" | jq --raw-output ' + (.roles | map(select(.protected == false))) as $roles | + if any($roles[]; .name == "neondb_owner") + then "neondb_owner" + else $roles[0].name + end + ') echo "role_name=${role_name}" >> $GITHUB_OUTPUT env: API_HOST: ${{ inputs.api_host }} @@ -107,13 +113,13 @@ runs: ) if [ -z "${reset_password}" ]; then - sleep 1 + sleep $i continue fi password=$(echo $reset_password | jq --raw-output '.role.password') if [ "${password}" == "null" ]; then - sleep 1 + sleep $i # increasing backoff continue fi diff --git a/.github/workflows/large_oltp_benchmark.yml b/.github/workflows/large_oltp_benchmark.yml new file mode 100644 index 0000000000..f33e11cd08 --- /dev/null +++ b/.github/workflows/large_oltp_benchmark.yml @@ -0,0 +1,147 @@ +name: large oltp benchmark + +on: + # uncomment to run on push for debugging your PR + push: + branches: [ bodobolero/synthetic_oltp_workload ] + + schedule: + # * is a special character in YAML so you have to quote this string + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks + workflow_dispatch: # adds ability to run this manually + +defaults: + run: + shell: bash -euxo pipefail {0} + +concurrency: + # Allow only one workflow globally because we need dedicated resources which only exist once + group: large-oltp-bench-workflow + cancel-in-progress: true + +jobs: + oltp: + strategy: + fail-fast: false # allow other variants to continue even if one fails + matrix: + include: + - target: new_branch + custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + - target: reuse_branch + custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results + permissions: + contents: write + statuses: write + id-token: write # aws-actions/configure-aws-credentials + env: + TEST_PG_BENCH_DURATIONS_MATRIX: "1h" # todo update to > 1 h + TEST_PGBENCH_CUSTOM_SCRIPTS: ${{ matrix.custom_scripts }} + POSTGRES_DISTRIB_DIR: /tmp/neon/pg_install + PG_VERSION: 16 # pre-determined by pre-determined project + TEST_OUTPUT: /tmp/test_output + BUILD_TYPE: remote + SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }} + PLATFORM: ${{ matrix.target }} + + runs-on: [ self-hosted, us-east-2, x64 ] + container: + image: neondatabase/build-tools:pinned-bookworm + credentials: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + options: --init + + # Increase timeout to 8h, default timeout is 6h + timeout-minutes: 480 + + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials # necessary to download artefacts + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours is currently max associated with IAM role + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact + path: /tmp/neon/ + prefix: latest + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + + - name: Create Neon Branch for large tenant + if: ${{ matrix.target == 'new_branch' }} + id: create-neon-branch-oltp-target + uses: ./.github/actions/neon-branch-create + with: + project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }} + api_key: ${{ secrets.NEON_STAGING_API_KEY }} + + - name: Set up Connection String + id: set-up-connstr + run: | + case "${{ matrix.target }}" in + new_branch) + CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} + ;; + reuse_branch) + CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} + ;; + *) + echo >&2 "Unknown target=${{ matrix.target }}" + exit 1 + ;; + esac + + echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + + - name: Benchmark pgbench with custom-scripts + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: ${{ env.SAVE_PERF_REPORT }} + extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant + pg_version: ${{ env.PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + + - name: Delete Neon Branch for large tenant + if: ${{ always() && matrix.target == 'new_branch' }} + uses: ./.github/actions/neon-branch-delete + with: + project_id: ${{ vars.BENCHMARK_LARGE_OLTP_PROJECTID }} + branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }} + api_key: ${{ secrets.NEON_STAGING_API_KEY }} + + - name: Create Allure report + id: create-allure-report + if: ${{ !cancelled() }} + uses: ./.github/actions/allure-report-generate + with: + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + + - name: Post to a Slack channel + if: ${{ github.event.schedule && failure() }} + uses: slackapi/slack-github-action@v1 + with: + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream + slack-message: | + Periodic large oltp perf testing: ${{ job.status }} + <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> + <${{ steps.create-allure-report.outputs.report-url }}|Allure report> + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 53df10be49..3aa018e99e 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -253,10 +253,15 @@ class PgProtocol: # enough for our tests, but if you need a longer, you can # change it by calling "SET statement_timeout" after # connecting. + # pooler does not support statement_timeout + # Check if the hostname contains the string 'pooler' + hostname = result.get("host", "") + log.info(f"Hostname: {hostname}") options = result.get("options", "") - if "statement_timeout" not in options: + if "statement_timeout" not in options and "pooler" not in hostname: options = f"-cstatement_timeout=120s {options}" result["options"] = options + return result # autocommit=True here by default because that's what we need most of the time diff --git a/test_runner/performance/large_synthetic_oltp/insert_webhooks.sql b/test_runner/performance/large_synthetic_oltp/insert_webhooks.sql new file mode 100644 index 0000000000..69e6366a53 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/insert_webhooks.sql @@ -0,0 +1,47 @@ +\set event_type random(1,10) +\set service_key random(1, 3) + +INSERT INTO webhook.incoming_webhooks ( + created_at, + delivery_id, + upstream_emitted_at, + service_key, + event_id, + source, + body, + json, + additional_data, + is_body_encrypted, + event_type +) VALUES ( + now(), + gen_random_uuid(), + now() - interval '10 minutes', + CASE :service_key::int + WHEN 1 THEN 'shopify' + WHEN 2 THEN 'stripe' + WHEN 3 THEN 'github' + END, + 'evt_' || gen_random_uuid(), -- Ensures uniqueness + CASE :service_key::int + WHEN 1 THEN 'Shopify' + WHEN 2 THEN 'Stripe' + WHEN 3 THEN 'GitHub' + END, + '{"order_id": 987654, "customer": {"name": "John Doe", "email": "john.doe@example.com"}, "items": [{"product_id": 12345, "quantity": 2}, {"product_id": 67890, "quantity": 1}], "total": 199.99}', + '{"order_id": 987654, "customer": {"name": "John Doe", "email": "john.doe@example.com"}, "items": [{"product_id": 12345, "quantity": 2}, {"product_id": 67890, "quantity": 1}], "total": 199.99}'::jsonb, + '{"metadata": {"user_agent": "Mozilla/5.0", "ip_address": "203.0.113.42"}}'::jsonb, + false, + CASE :event_type::int + WHEN 1 THEN 'ORDER_PLACED' + WHEN 2 THEN 'ORDER_CANCELLED' + WHEN 3 THEN 'PAYMENT_SUCCESSFUL' + WHEN 4 THEN 'PAYMENT_FAILED' + WHEN 5 THEN 'CUSTOMER_CREATED' + WHEN 6 THEN 'CUSTOMER_UPDATED' + WHEN 7 THEN 'PRODUCT_UPDATED' + WHEN 8 THEN 'INVENTORY_LOW' + WHEN 9 THEN 'SHIPPING_DISPATCHED' + WHEN 10 THEN 'REFUND_ISSUED' + END +); \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/select_any_webhook_with_skew.sql b/test_runner/performance/large_synthetic_oltp/select_any_webhook_with_skew.sql new file mode 100644 index 0000000000..b2f173f011 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/select_any_webhook_with_skew.sql @@ -0,0 +1,15 @@ +-- Zipfian distributions model real-world access patterns where: +-- A few values (popular IDs) are accessed frequently. +-- Many values are accessed rarely. +-- This is useful for simulating realistic workloads, like webhook processing where recent events are more frequently accessed. + +\set alpha 1.2 +\set min_id 1 +\set max_id 135000000 + +\set zipf_random_id random_zipfian(:min_id, :max_id, :alpha) + +SELECT * +FROM webhook.incoming_webhooks +WHERE id = (:zipf_random_id)::bigint +LIMIT 1; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/select_recent_webhook.sql b/test_runner/performance/large_synthetic_oltp/select_recent_webhook.sql new file mode 100644 index 0000000000..78a843bf0f --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/select_recent_webhook.sql @@ -0,0 +1,9 @@ +-- select one of the most recent webhook records (created in the branch timeline during the bench run) +SELECT * +FROM webhook.incoming_webhooks +WHERE id = ( + SELECT (floor(random() * ( + (SELECT last_value FROM webhook.incoming_webhooks_id_seq) - 1350000001 + 1 + ) + 1350000001))::bigint +) +LIMIT 1; \ No newline at end of file diff --git a/test_runner/performance/test_perf_oltp_large_tenant.py b/test_runner/performance/test_perf_oltp_large_tenant.py new file mode 100644 index 0000000000..ae00dbb3b5 --- /dev/null +++ b/test_runner/performance/test_perf_oltp_large_tenant.py @@ -0,0 +1,90 @@ +from __future__ import annotations + +import os +import timeit +from pathlib import Path + +import pytest +from fixtures.benchmark_fixture import PgBenchRunResult +from fixtures.compare_fixtures import PgCompare + +from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp + + +def get_custom_scripts( + default: str = "insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4", +) -> list[str]: + # We parametrize each run with the custom scripts to run and their weights. + # The custom scripts and their weights are passed through TEST_PGBENCH_CUSTOM_SCRIPTS env variable. + # Delimit the custom scripts for one run by spaces and for different runs by commas, for example: + # "insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4,insert_webhooks.sql@8 select_any_webhook_with_skew.sql@2" + # Databases/branches are pre-created and passed through BENCHMARK_CONNSTR env variable. + scripts = os.getenv("TEST_PGBENCH_CUSTOM_SCRIPTS", default=str(default)) + rv = [] + for s in scripts.split(","): + rv.append(s) + return rv + + +def run_test_pgbench(env: PgCompare, custom_scripts: str, duration: int): + password = env.pg.default_options.get("password", None) + options = env.pg.default_options.get("options", "") + # drop password from the connection string by passing password=None and set password separately + connstr = env.pg.connstr(password=None, options=options) + # if connstr does not contain pooler we can set statement_timeout to 0 + if "pooler" not in connstr: + options = "-cstatement_timeout=0 " + env.pg.default_options.get("options", "") + connstr = env.pg.connstr(password=None, options=options) + + script_args = [ + "pgbench", + "-n", # no explicit vacuum before the test - we want to rely on auto-vacuum + "-M", + "prepared", + "--client=500", + "--jobs=100", + f"-T{duration}", + "-P60", # progress every minute + "--progress-timestamp", + ] + for script in custom_scripts.split(): + script_args.extend(["-f", f"test_runner/performance/large_synthetic_oltp/{script}"]) + script_args.append(connstr) + + run_pgbench( + env, + "custom-scripts", + script_args, + password=password, + ) + + +def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None): + environ: dict[str, str] = {} + if password is not None: + environ["PGPASSWORD"] = password + + run_start_timestamp = utc_now_timestamp() + t0 = timeit.default_timer() + out = env.pg_bin.run_capture(cmdline, env=environ) + run_duration = timeit.default_timer() - t0 + run_end_timestamp = utc_now_timestamp() + env.flush() + + stdout = Path(f"{out}.stdout").read_text() + + res = PgBenchRunResult.parse_from_stdout( + stdout=stdout, + run_duration=run_duration, + run_start_timestamp=run_start_timestamp, + run_end_timestamp=run_end_timestamp, + ) + env.zenbenchmark.record_pg_bench_result(prefix, res) + + +@pytest.mark.parametrize("custom_scripts", get_custom_scripts()) +@pytest.mark.parametrize("duration", get_durations_matrix()) +@pytest.mark.remote_cluster +def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int): + run_test_pgbench(remote_compare, custom_scripts, duration) + # todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration From b953daa21fce8b41e4c4556da06279223f8f411a Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 3 Mar 2025 16:03:51 +0000 Subject: [PATCH 072/207] safekeeper: allow remote deletion to proceed after dropped requests (#11042) ## Problem If a caller times out on safekeeper timeline deletion on a large timeline, and waits a while before retrying, the deletion will not progress while the retry is waiting. The net effect is very very slow deletion as it only proceeds in 30 second bursts across 5 minute idle periods. Related: https://github.com/neondatabase/neon/issues/10265 ## Summary of changes - Run remote deletion in a background task - Carry a watch::Receiver on the Timeline for other callers to join the wait - Restart deletion if the API is called again and the previous attempt failed --- safekeeper/src/timeline.rs | 94 ++++- safekeeper/src/wal_backup.rs | 8 +- test_runner/fixtures/remote_storage.py | 11 + test_runner/fixtures/safekeeper/http.py | 3 +- test_runner/fixtures/safekeeper_utils.py | 92 +++++ .../regress/test_safekeeper_deletion.py | 331 ++++++++++++++++++ test_runner/regress/test_wal_acceptor.py | 307 +--------------- 7 files changed, 541 insertions(+), 305 deletions(-) create mode 100644 test_runner/fixtures/safekeeper_utils.py create mode 100644 test_runner/regress/test_safekeeper_deletion.py diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 930f66a207..d3c841ec09 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -415,6 +415,9 @@ impl From for ApiError { } } +/// We run remote deletion in a background task, this is how it sends its results back. +type RemoteDeletionReceiver = tokio::sync::watch::Receiver>>; + /// Timeline struct manages lifecycle (creation, deletion, restore) of a safekeeper timeline. /// It also holds SharedState and provides mutually exclusive access to it. pub struct Timeline { @@ -446,6 +449,8 @@ pub struct Timeline { manager_ctl: ManagerCtl, conf: Arc, + remote_deletion: std::sync::Mutex>, + /// Hold this gate from code that depends on the Timeline's non-shut-down state. While holding /// this gate, you must respect [`Timeline::cancel`] pub(crate) gate: Gate, @@ -494,6 +499,7 @@ impl Timeline { walreceivers, gate: Default::default(), cancel: CancellationToken::default(), + remote_deletion: std::sync::Mutex::new(None), manager_ctl: ManagerCtl::new(), conf, broker_active: AtomicBool::new(false), @@ -598,15 +604,95 @@ impl Timeline { shared_state.sk.close_wal_store(); if !only_local && self.conf.is_wal_backup_enabled() { - // Note: we concurrently delete remote storage data from multiple - // safekeepers. That's ok, s3 replies 200 if object doesn't exist and we - // do some retries anyway. - wal_backup::delete_timeline(&self.ttid).await?; + self.remote_delete().await?; } let dir_existed = delete_dir(&self.timeline_dir).await?; Ok(dir_existed) } + /// Delete timeline content from remote storage. If the returned future is dropped, + /// deletion will continue in the background. + /// + /// This function ordinarily spawns a task and stashes a result receiver into [`Self::remote_deletion`]. If + /// deletion is already happening, it may simply wait for an existing task's result. + /// + /// Note: we concurrently delete remote storage data from multiple + /// safekeepers. That's ok, s3 replies 200 if object doesn't exist and we + /// do some retries anyway. + async fn remote_delete(&self) -> Result<()> { + // We will start a background task to do the deletion, so that it proceeds even if our + // API request is dropped. Future requests will see the existing deletion task and wait + // for it to complete. + let mut result_rx = { + let mut remote_deletion_state = self.remote_deletion.lock().unwrap(); + let result_rx = if let Some(result_rx) = remote_deletion_state.as_ref() { + if let Some(result) = result_rx.borrow().as_ref() { + if let Err(e) = result { + // A previous remote deletion failed: we will start a new one + tracing::error!("remote deletion failed, will retry ({e})"); + None + } else { + // A previous remote deletion call already succeeded + return Ok(()); + } + } else { + // Remote deletion is still in flight + Some(result_rx.clone()) + } + } else { + // Remote deletion was not attempted yet, start it now. + None + }; + + match result_rx { + Some(result_rx) => result_rx, + None => self.start_remote_delete(&mut remote_deletion_state), + } + }; + + // Wait for a result + let Ok(result) = result_rx.wait_for(|v| v.is_some()).await else { + // Unexpected: sender should always send a result before dropping the channel, even if it has an error + return Err(anyhow::anyhow!( + "remote deletion task future was dropped without sending a result" + )); + }; + + result + .as_ref() + .expect("We did a wait_for on this being Some above") + .as_ref() + .map(|_| ()) + .map_err(|e| anyhow::anyhow!("remote deletion failed: {e}")) + } + + /// Spawn background task to do remote deletion, return a receiver for its outcome + fn start_remote_delete( + &self, + guard: &mut std::sync::MutexGuard>, + ) -> RemoteDeletionReceiver { + tracing::info!("starting remote deletion"); + let (result_tx, result_rx) = tokio::sync::watch::channel(None); + let ttid = self.ttid; + tokio::task::spawn( + async move { + let r = wal_backup::delete_timeline(&ttid).await; + if let Err(e) = &r { + // Log error here in case nobody ever listens for our result (e.g. dropped API request) + tracing::error!("remote deletion failed: {e}"); + } + + // Ignore send results: it's legal for the Timeline to give up waiting for us. + let _ = result_tx.send(Some(r)); + } + .instrument(info_span!("remote_delete", timeline = %self.ttid)), + ); + + **guard = Some(result_rx.clone()); + + result_rx + } + /// Returns if timeline is cancelled. pub fn is_cancelled(&self) -> bool { self.cancel.is_cancelled() diff --git a/safekeeper/src/wal_backup.rs b/safekeeper/src/wal_backup.rs index 6176e64698..56f4a2faf9 100644 --- a/safekeeper/src/wal_backup.rs +++ b/safekeeper/src/wal_backup.rs @@ -21,9 +21,9 @@ use tokio::sync::{OnceCell, watch}; use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::*; -use utils::backoff; use utils::id::{NodeId, TenantTimelineId}; use utils::lsn::Lsn; +use utils::{backoff, pausable_failpoint}; use crate::metrics::{BACKED_UP_SEGMENTS, BACKUP_ERRORS, WAL_BACKUP_TASKS}; use crate::timeline::WalResidentTimeline; @@ -564,6 +564,12 @@ pub async fn delete_timeline(ttid: &TenantTimelineId) -> Result<()> { // We don't currently have http requests timeout cancellation, but if/once // we have listing should get streaming interface to make progress. + pausable_failpoint!("sk-delete-timeline-remote-pause"); + + fail::fail_point!("sk-delete-timeline-remote", |_| { + Err(anyhow::anyhow!("failpoint: sk-delete-timeline-remote")) + }); + let cancel = CancellationToken::new(); // not really used backoff::retry( || async { diff --git a/test_runner/fixtures/remote_storage.py b/test_runner/fixtures/remote_storage.py index 4df2b2df2b..cac84c07e7 100644 --- a/test_runner/fixtures/remote_storage.py +++ b/test_runner/fixtures/remote_storage.py @@ -282,6 +282,17 @@ class S3Storage: def timeline_path(self, tenant_id: TenantShardId | TenantId, timeline_id: TimelineId) -> str: return f"{self.tenant_path(tenant_id)}/timelines/{timeline_id}" + def safekeeper_tenants_path(self) -> str: + return f"{self.prefix_in_bucket}" + + def safekeeper_tenant_path(self, tenant_id: TenantShardId | TenantId) -> str: + return f"{self.safekeeper_tenants_path()}/{tenant_id}" + + def safekeeper_timeline_path( + self, tenant_id: TenantShardId | TenantId, timeline_id: TimelineId + ) -> str: + return f"{self.safekeeper_tenant_path(tenant_id)}/{timeline_id}" + def get_latest_generation_key(self, prefix: str, suffix: str, keys: list[str]) -> str: """ Gets the latest generation key from a list of keys. diff --git a/test_runner/fixtures/safekeeper/http.py b/test_runner/fixtures/safekeeper/http.py index 7038d87aba..e409151b76 100644 --- a/test_runner/fixtures/safekeeper/http.py +++ b/test_runner/fixtures/safekeeper/http.py @@ -229,13 +229,14 @@ class SafekeeperHttpClient(requests.Session, MetricsGetter): # only_local doesn't remove segments in the remote storage. def timeline_delete( - self, tenant_id: TenantId, timeline_id: TimelineId, only_local: bool = False + self, tenant_id: TenantId, timeline_id: TimelineId, only_local: bool = False, **kwargs ) -> dict[Any, Any]: res = self.delete( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}", params={ "only_local": str(only_local).lower(), }, + **kwargs, ) res.raise_for_status() res_json = res.json() diff --git a/test_runner/fixtures/safekeeper_utils.py b/test_runner/fixtures/safekeeper_utils.py new file mode 100644 index 0000000000..158baf7bb6 --- /dev/null +++ b/test_runner/fixtures/safekeeper_utils.py @@ -0,0 +1,92 @@ +from fixtures.common_types import Lsn, TenantId, TimelineId +from fixtures.log_helper import log +from fixtures.neon_fixtures import Endpoint, NeonPageserver, Safekeeper +from fixtures.pageserver.utils import wait_for_last_record_lsn, wait_for_upload +from fixtures.utils import get_dir_size + + +def is_segment_offloaded( + sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, seg_end: Lsn +): + http_cli = sk.http_client() + tli_status = http_cli.timeline_status(tenant_id, timeline_id) + log.info(f"sk status is {tli_status}") + return tli_status.backup_lsn >= seg_end + + +def is_flush_lsn_caught_up(sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, lsn: Lsn): + http_cli = sk.http_client() + tli_status = http_cli.timeline_status(tenant_id, timeline_id) + log.info(f"sk status is {tli_status}") + return tli_status.flush_lsn >= lsn + + +def is_wal_trimmed(sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, target_size_mb): + http_cli = sk.http_client() + tli_status = http_cli.timeline_status(tenant_id, timeline_id) + sk_wal_size = get_dir_size(sk.timeline_dir(tenant_id, timeline_id)) + sk_wal_size_mb = sk_wal_size / 1024 / 1024 + log.info(f"Safekeeper id={sk.id} wal_size={sk_wal_size_mb:.2f}MB status={tli_status}") + return sk_wal_size_mb <= target_size_mb + + +def wait_lsn_force_checkpoint( + tenant_id: TenantId, + timeline_id: TimelineId, + endpoint: Endpoint, + ps: NeonPageserver, + pageserver_conn_options=None, +): + pageserver_conn_options = pageserver_conn_options or {} + lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) + log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver") + + wait_lsn_force_checkpoint_at(lsn, tenant_id, timeline_id, ps, pageserver_conn_options) + + +def wait_lsn_force_checkpoint_at_sk( + safekeeper: Safekeeper, + tenant_id: TenantId, + timeline_id: TimelineId, + ps: NeonPageserver, + pageserver_conn_options=None, +): + sk_flush_lsn = safekeeper.get_flush_lsn(tenant_id, timeline_id) + wait_lsn_force_checkpoint_at(sk_flush_lsn, tenant_id, timeline_id, ps, pageserver_conn_options) + + +def wait_lsn_force_checkpoint_at( + lsn: Lsn, + tenant_id: TenantId, + timeline_id: TimelineId, + ps: NeonPageserver, + pageserver_conn_options=None, +): + """ + Wait until pageserver receives given lsn, force checkpoint and wait for + upload, i.e. remote_consistent_lsn advancement. + """ + pageserver_conn_options = pageserver_conn_options or {} + + auth_token = None + if "password" in pageserver_conn_options: + auth_token = pageserver_conn_options["password"] + + # wait for the pageserver to catch up + wait_for_last_record_lsn( + ps.http_client(auth_token=auth_token), + tenant_id, + timeline_id, + lsn, + ) + + # force checkpoint to advance remote_consistent_lsn + ps.http_client(auth_token).timeline_checkpoint(tenant_id, timeline_id) + + # ensure that remote_consistent_lsn is advanced + wait_for_upload( + ps.http_client(auth_token=auth_token), + tenant_id, + timeline_id, + lsn, + ) diff --git a/test_runner/regress/test_safekeeper_deletion.py b/test_runner/regress/test_safekeeper_deletion.py new file mode 100644 index 0000000000..b46095d583 --- /dev/null +++ b/test_runner/regress/test_safekeeper_deletion.py @@ -0,0 +1,331 @@ +from __future__ import annotations + +import threading +import time +from contextlib import closing +from enum import StrEnum + +import pytest +import requests +from fixtures.common_types import Lsn, TimelineId +from fixtures.log_helper import log +from fixtures.neon_fixtures import ( + Endpoint, + NeonEnvBuilder, +) +from fixtures.remote_storage import S3Storage, s3_storage +from fixtures.safekeeper_utils import is_segment_offloaded +from fixtures.utils import wait_until + + +@pytest.mark.parametrize("auth_enabled", [False, True]) +def test_safekeeper_delete_timeline(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): + neon_env_builder.auth_enabled = auth_enabled + env = neon_env_builder.init_start() + + # FIXME: are these expected? + env.pageserver.allowed_errors.extend( + [ + ".*Timeline .* was not found in global map.*", + ".*Timeline .* was cancelled and cannot be used anymore.*", + ] + ) + + # Create two tenants: one will be deleted, other should be preserved. + tenant_id = env.initial_tenant + timeline_id_1 = env.create_branch("br1") # Active, delete explicitly + timeline_id_2 = env.create_branch("br2") # Inactive, delete explicitly + timeline_id_3 = env.create_branch("br3") # Active, delete with the tenant + timeline_id_4 = env.create_branch("br4") # Inactive, delete with the tenant + + tenant_id_other, timeline_id_other = env.create_tenant() + + # Populate branches + endpoint_1 = env.endpoints.create_start("br1") + endpoint_2 = env.endpoints.create_start("br2") + endpoint_3 = env.endpoints.create_start("br3") + endpoint_4 = env.endpoints.create_start("br4") + endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other) + for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]: + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CREATE TABLE t(key int primary key)") + sk = env.safekeepers[0] + sk_data_dir = sk.data_dir + if not auth_enabled: + sk_http = sk.http_client() + sk_http_other = sk_http + else: + sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + sk_http_other = sk.http_client( + auth_token=env.auth_keys.generate_tenant_token(tenant_id_other) + ) + sk_http_noauth = sk.http_client(gen_sk_wide_token=False) + assert (sk_data_dir / str(tenant_id) / str(timeline_id_1)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Stop branches which should be inactive and restart Safekeeper to drop its in-memory state. + endpoint_2.stop_and_destroy() + endpoint_4.stop_and_destroy() + sk.stop() + sk.start() + + # Ensure connections to Safekeeper are established + for endpoint in [endpoint_1, endpoint_3, endpoint_other]: + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("INSERT INTO t (key) VALUES (1)") + + # Stop all computes gracefully before safekeepers stop responding to them + endpoint_1.stop_and_destroy() + endpoint_3.stop_and_destroy() + + # Remove initial tenant's br1 (active) + assert sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"] + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Ensure repeated deletion succeeds + assert not sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"] + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + if auth_enabled: + # Ensure we cannot delete the other tenant + for sk_h in [sk_http, sk_http_noauth]: + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): + assert sk_h.timeline_delete(tenant_id_other, timeline_id_other) + with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): + assert sk_h.tenant_delete_force(tenant_id_other) + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Remove initial tenant's br2 (inactive) + assert sk_http.timeline_delete(tenant_id, timeline_id_2)["dir_existed"] + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Remove non-existing branch, should succeed + assert not sk_http.timeline_delete(tenant_id, TimelineId("00" * 16))["dir_existed"] + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).exists() + assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Remove initial tenant fully (two branches are active) + response = sk_http.tenant_delete_force(tenant_id) + assert response[str(timeline_id_3)]["dir_existed"] + assert not (sk_data_dir / str(tenant_id)).exists() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Remove initial tenant again. + response = sk_http.tenant_delete_force(tenant_id) + # assert response == {} + assert not (sk_data_dir / str(tenant_id)).exists() + assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() + + # Ensure the other tenant still works + sk_http_other.timeline_status(tenant_id_other, timeline_id_other) + with closing(endpoint_other.connect()) as conn: + with conn.cursor() as cur: + cur.execute("INSERT INTO t (key) VALUES (123)") + + +def test_safekeeper_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder): + """ + Test deleting timelines on a safekeeper while they're under load. + + This should not happen under normal operation, but it can happen if + there is some rogue compute/pageserver that is writing/reading to a + safekeeper that we're migrating a timeline away from, or if the timeline + is being deleted while such a rogue client is running. + """ + neon_env_builder.auth_enabled = True + env = neon_env_builder.init_start() + + # Create two endpoints that will generate load + timeline_id_a = env.create_branch("deleteme_a") + timeline_id_b = env.create_branch("deleteme_b") + + endpoint_a = env.endpoints.create("deleteme_a") + endpoint_a.start() + endpoint_b = env.endpoints.create("deleteme_b") + endpoint_b.start() + + # Get tenant and timeline IDs + tenant_id = env.initial_tenant + + # Start generating load on both timelines + def generate_load(endpoint: Endpoint): + with closing(endpoint.connect()) as conn: + with conn.cursor() as cur: + cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") + while True: + try: + cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'data'") + except: # noqa + # Ignore errors since timeline may be deleted + break + + t_a = threading.Thread(target=generate_load, args=(endpoint_a,)) + t_b = threading.Thread(target=generate_load, args=(endpoint_b,)) + try: + t_a.start() + t_b.start() + + # Let the load run for a bit + log.info("Warming up...") + time.sleep(2) + + # Safekeeper errors will propagate to the pageserver: it is correct that these are + # logged at error severity because they indicate the pageserver is trying to read + # a timeline that it shouldn't. + env.pageserver.allowed_errors.extend( + [ + ".*Timeline.*was cancelled.*", + ".*Timeline.*was not found.*", + ] + ) + + # Try deleting timelines while under load + sk = env.safekeepers[0] + sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) + + # Delete first timeline + log.info(f"Deleting {timeline_id_a}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_a, only_local=True)["dir_existed"] + + # Delete second timeline + log.info(f"Deleting {timeline_id_b}...") + assert sk_http.timeline_delete(tenant_id, timeline_id_b, only_local=True)["dir_existed"] + + # Verify timelines are gone from disk + sk_data_dir = sk.data_dir + assert not (sk_data_dir / str(tenant_id) / str(timeline_id_a)).exists() + # assert not (sk_data_dir / str(tenant_id) / str(timeline_id_b)).exists() + + finally: + log.info("Stopping endpoints...") + # Stop endpoints with immediate mode because we deleted the timeline out from under the compute, which may cause it to hang + endpoint_a.stop(mode="immediate") + endpoint_b.stop(mode="immediate") + log.info("Joining threads...") + t_a.join() + t_b.join() + + +class RemoteDeleteFailpoint(StrEnum): + PAUSE = "sk-delete-timeline-remote-pause" + FAIL = "sk-delete-timeline-remote" + + +@pytest.mark.parametrize("failpoint", [RemoteDeleteFailpoint.PAUSE, RemoteDeleteFailpoint.FAIL]) +def test_safekeeper_delete_remote_errors( + neon_env_builder: NeonEnvBuilder, failpoint: RemoteDeleteFailpoint +): + """ + Test that errors and delays during remote deletion are handled correctly. + """ + + # Configure safekeepers with ultra-fast eviction policy + neon_env_builder.safekeeper_extra_opts = [ + "--enable-offload", + "--delete-offloaded-wal", + "--control-file-save-interval", + "1s", + ] + neon_env_builder.enable_safekeeper_remote_storage(s3_storage()) + env = neon_env_builder.init_start() + + # FIXME: pageserver is intermittently emitting this + env.pageserver.allowed_errors.extend( + [ + ".*unsupported command START_WAL_PUSH in START_WAL_PUSH.*", + ] + ) + + timeline_id_a = env.create_branch("deleteme_a") + endpoint_a = env.endpoints.create("deleteme_a") + endpoint_a.start() + with closing(endpoint_a.connect()) as conn: + with conn.cursor() as cur: + # roughly fills one segment + cur.execute("create table t(key int, value text)") + cur.execute("insert into t select generate_series(1,250000), 'payload'") + endpoint_a.stop() + + # Ensure something is uploaded to remote storage + def assert_is_uploaded(): + assert is_segment_offloaded( + env.safekeepers[0], env.initial_tenant, timeline_id_a, Lsn("0/2000000") + ) + + wait_until(assert_is_uploaded) + + def list_timeline_remote(): + assert isinstance(env.safekeepers_remote_storage, S3Storage) + prefix = f"{env.safekeepers_remote_storage.safekeeper_timeline_path(env.initial_tenant, timeline_id_a)}/" + + listing = env.safekeepers_remote_storage.client.list_objects_v2( + Bucket=env.safekeepers_remote_storage.bucket_name, + Prefix=prefix, + ) + return listing.get("Contents", []) + + assert list_timeline_remote() != [] + + sk_http = env.safekeepers[0].http_client() + env.pageserver.http_client().timeline_delete(env.initial_tenant, timeline_id_a) + + # Set up failpoint + if failpoint == RemoteDeleteFailpoint.PAUSE: + sk_http.configure_failpoints((failpoint, "pause")) + elif failpoint == RemoteDeleteFailpoint.FAIL: + sk_http.configure_failpoints((failpoint, "return")) + else: + raise NotImplementedError(f"Unknown failpoint: {failpoint}") + + # Delete the timeline - this should hit the configured failpoint + if failpoint == RemoteDeleteFailpoint.PAUSE: + # Expect time out + with pytest.raises(requests.exceptions.ReadTimeout, match="timed out"): + sk_http.timeline_delete(env.initial_tenant, timeline_id_a, timeout=5) + + # Assert deletion didn't happy yet + assert list_timeline_remote() != [] + + # Unblock the background task that should still be running + sk_http.configure_failpoints((failpoint, "off")) + + # Expect that after unblocking, remote deletion proceeds + def assert_remote_deleted(): + assert list_timeline_remote() == [] + + wait_until(assert_remote_deleted) + + elif failpoint == RemoteDeleteFailpoint.FAIL: + # Expect immediate failure + with pytest.raises(sk_http.HTTPError, match="Internal Server Error"): + sk_http.timeline_delete(env.initial_tenant, timeline_id_a) + + sk_http.configure_failpoints((failpoint, "off")) + else: + raise NotImplementedError(f"Unknown failpoint: {failpoint}") + + # Retry should succeed + sk_http.timeline_delete(env.initial_tenant, timeline_id_a) + + # Remote storage should be empty + assert list_timeline_remote() == [] diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 8f70b460c6..0366e88389 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -27,7 +27,6 @@ from fixtures.metrics import parse_metrics from fixtures.neon_fixtures import ( Endpoint, NeonEnvBuilder, - NeonPageserver, PgBin, PgProtocol, Safekeeper, @@ -38,8 +37,6 @@ from fixtures.pageserver.utils import ( assert_prefix_empty, assert_prefix_not_empty, timeline_delete_wait_completed, - wait_for_last_record_lsn, - wait_for_upload, ) from fixtures.pg_version import PgVersion from fixtures.port_distributor import PortDistributor @@ -55,9 +52,16 @@ from fixtures.safekeeper.http import ( TimelineCreateRequest, ) from fixtures.safekeeper.utils import wait_walreceivers_absent +from fixtures.safekeeper_utils import ( + is_flush_lsn_caught_up, + is_segment_offloaded, + is_wal_trimmed, + wait_lsn_force_checkpoint, + wait_lsn_force_checkpoint_at, + wait_lsn_force_checkpoint_at_sk, +) from fixtures.utils import ( PropagatingThread, - get_dir_size, query_scalar, run_only_on_default_postgres, skip_in_debug_build, @@ -69,68 +73,6 @@ if TYPE_CHECKING: from typing import Any, Self -def wait_lsn_force_checkpoint( - tenant_id: TenantId, - timeline_id: TimelineId, - endpoint: Endpoint, - ps: NeonPageserver, - pageserver_conn_options=None, -): - pageserver_conn_options = pageserver_conn_options or {} - lsn = Lsn(endpoint.safe_psql("SELECT pg_current_wal_flush_lsn()")[0][0]) - log.info(f"pg_current_wal_flush_lsn is {lsn}, waiting for it on pageserver") - - wait_lsn_force_checkpoint_at(lsn, tenant_id, timeline_id, ps, pageserver_conn_options) - - -def wait_lsn_force_checkpoint_at_sk( - safekeeper: Safekeeper, - tenant_id: TenantId, - timeline_id: TimelineId, - ps: NeonPageserver, - pageserver_conn_options=None, -): - sk_flush_lsn = safekeeper.get_flush_lsn(tenant_id, timeline_id) - wait_lsn_force_checkpoint_at(sk_flush_lsn, tenant_id, timeline_id, ps, pageserver_conn_options) - - -def wait_lsn_force_checkpoint_at( - lsn: Lsn, - tenant_id: TenantId, - timeline_id: TimelineId, - ps: NeonPageserver, - pageserver_conn_options=None, -): - """ - Wait until pageserver receives given lsn, force checkpoint and wait for - upload, i.e. remote_consistent_lsn advancement. - """ - pageserver_conn_options = pageserver_conn_options or {} - - auth_token = None - if "password" in pageserver_conn_options: - auth_token = pageserver_conn_options["password"] - - # wait for the pageserver to catch up - wait_for_last_record_lsn( - ps.http_client(auth_token=auth_token), - tenant_id, - timeline_id, - lsn, - ) - - # force checkpoint to advance remote_consistent_lsn - ps.http_client(auth_token).timeline_checkpoint(tenant_id, timeline_id) - - # ensure that remote_consistent_lsn is advanced - wait_for_upload( - ps.http_client(auth_token=auth_token), - tenant_id, - timeline_id, - lsn, - ) - - @dataclass class TimelineMetrics: timeline_id: TimelineId @@ -475,31 +417,6 @@ def wait(f, desc, timeout=30, wait_f=None): wait_f() -def is_segment_offloaded( - sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, seg_end: Lsn -): - http_cli = sk.http_client() - tli_status = http_cli.timeline_status(tenant_id, timeline_id) - log.info(f"sk status is {tli_status}") - return tli_status.backup_lsn >= seg_end - - -def is_flush_lsn_caught_up(sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, lsn: Lsn): - http_cli = sk.http_client() - tli_status = http_cli.timeline_status(tenant_id, timeline_id) - log.info(f"sk status is {tli_status}") - return tli_status.flush_lsn >= lsn - - -def is_wal_trimmed(sk: Safekeeper, tenant_id: TenantId, timeline_id: TimelineId, target_size_mb): - http_cli = sk.http_client() - tli_status = http_cli.timeline_status(tenant_id, timeline_id) - sk_wal_size = get_dir_size(sk.timeline_dir(tenant_id, timeline_id)) - sk_wal_size_mb = sk_wal_size / 1024 / 1024 - log.info(f"Safekeeper id={sk.id} wal_size={sk_wal_size_mb:.2f}MB status={tli_status}") - return sk_wal_size_mb <= target_size_mb - - def test_wal_backup(neon_env_builder: NeonEnvBuilder): neon_env_builder.num_safekeepers = 3 remote_storage_kind = s3_storage() @@ -1685,214 +1602,6 @@ def test_replace_safekeeper(neon_env_builder: NeonEnvBuilder): show_statuses(env.safekeepers, tenant_id, timeline_id) -@pytest.mark.parametrize("auth_enabled", [False, True]) -def test_delete(neon_env_builder: NeonEnvBuilder, auth_enabled: bool): - neon_env_builder.auth_enabled = auth_enabled - env = neon_env_builder.init_start() - - # FIXME: are these expected? - env.pageserver.allowed_errors.extend( - [ - ".*Timeline .* was not found in global map.*", - ".*Timeline .* was cancelled and cannot be used anymore.*", - ] - ) - - # Create two tenants: one will be deleted, other should be preserved. - tenant_id = env.initial_tenant - timeline_id_1 = env.create_branch("br1") # Active, delete explicitly - timeline_id_2 = env.create_branch("br2") # Inactive, delete explicitly - timeline_id_3 = env.create_branch("br3") # Active, delete with the tenant - timeline_id_4 = env.create_branch("br4") # Inactive, delete with the tenant - - tenant_id_other, timeline_id_other = env.create_tenant() - - # Populate branches - endpoint_1 = env.endpoints.create_start("br1") - endpoint_2 = env.endpoints.create_start("br2") - endpoint_3 = env.endpoints.create_start("br3") - endpoint_4 = env.endpoints.create_start("br4") - endpoint_other = env.endpoints.create_start("main", tenant_id=tenant_id_other) - for endpoint in [endpoint_1, endpoint_2, endpoint_3, endpoint_4, endpoint_other]: - with closing(endpoint.connect()) as conn: - with conn.cursor() as cur: - cur.execute("CREATE TABLE t(key int primary key)") - sk = env.safekeepers[0] - sk_data_dir = sk.data_dir - if not auth_enabled: - sk_http = sk.http_client() - sk_http_other = sk_http - else: - sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) - sk_http_other = sk.http_client( - auth_token=env.auth_keys.generate_tenant_token(tenant_id_other) - ) - sk_http_noauth = sk.http_client(gen_sk_wide_token=False) - assert (sk_data_dir / str(tenant_id) / str(timeline_id_1)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Stop branches which should be inactive and restart Safekeeper to drop its in-memory state. - endpoint_2.stop_and_destroy() - endpoint_4.stop_and_destroy() - sk.stop() - sk.start() - - # Ensure connections to Safekeeper are established - for endpoint in [endpoint_1, endpoint_3, endpoint_other]: - with closing(endpoint.connect()) as conn: - with conn.cursor() as cur: - cur.execute("INSERT INTO t (key) VALUES (1)") - - # Stop all computes gracefully before safekeepers stop responding to them - endpoint_1.stop_and_destroy() - endpoint_3.stop_and_destroy() - - # Remove initial tenant's br1 (active) - assert sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"] - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Ensure repeated deletion succeeds - assert not sk_http.timeline_delete(tenant_id, timeline_id_1)["dir_existed"] - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_2)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - if auth_enabled: - # Ensure we cannot delete the other tenant - for sk_h in [sk_http, sk_http_noauth]: - with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): - assert sk_h.timeline_delete(tenant_id_other, timeline_id_other) - with pytest.raises(sk_h.HTTPError, match="Forbidden|Unauthorized"): - assert sk_h.tenant_delete_force(tenant_id_other) - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Remove initial tenant's br2 (inactive) - assert sk_http.timeline_delete(tenant_id, timeline_id_2)["dir_existed"] - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).is_dir() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Remove non-existing branch, should succeed - assert not sk_http.timeline_delete(tenant_id, TimelineId("00" * 16))["dir_existed"] - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_1)).exists() - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_2)).exists() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_3)).exists() - assert (sk_data_dir / str(tenant_id) / str(timeline_id_4)).is_dir() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Remove initial tenant fully (two branches are active) - response = sk_http.tenant_delete_force(tenant_id) - assert response[str(timeline_id_3)]["dir_existed"] - assert not (sk_data_dir / str(tenant_id)).exists() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Remove initial tenant again. - response = sk_http.tenant_delete_force(tenant_id) - # assert response == {} - assert not (sk_data_dir / str(tenant_id)).exists() - assert (sk_data_dir / str(tenant_id_other) / str(timeline_id_other)).is_dir() - - # Ensure the other tenant still works - sk_http_other.timeline_status(tenant_id_other, timeline_id_other) - with closing(endpoint_other.connect()) as conn: - with conn.cursor() as cur: - cur.execute("INSERT INTO t (key) VALUES (123)") - - -def test_delete_timeline_under_load(neon_env_builder: NeonEnvBuilder): - """ - Test deleting timelines on a safekeeper while they're under load. - - This should not happen under normal operation, but it can happen if - there is some rogue compute/pageserver that is writing/reading to a - safekeeper that we're migrating a timeline away from, or if the timeline - is being deleted while such a rogue client is running. - """ - neon_env_builder.auth_enabled = True - env = neon_env_builder.init_start() - - # Create two endpoints that will generate load - timeline_id_a = env.create_branch("deleteme_a") - timeline_id_b = env.create_branch("deleteme_b") - - endpoint_a = env.endpoints.create("deleteme_a") - endpoint_a.start() - endpoint_b = env.endpoints.create("deleteme_b") - endpoint_b.start() - - # Get tenant and timeline IDs - tenant_id = env.initial_tenant - - # Start generating load on both timelines - def generate_load(endpoint: Endpoint): - with closing(endpoint.connect()) as conn: - with conn.cursor() as cur: - cur.execute("CREATE TABLE IF NOT EXISTS t(key int, value text)") - while True: - try: - cur.execute("INSERT INTO t SELECT generate_series(1,1000), 'data'") - except: # noqa - # Ignore errors since timeline may be deleted - break - - t_a = threading.Thread(target=generate_load, args=(endpoint_a,)) - t_b = threading.Thread(target=generate_load, args=(endpoint_b,)) - try: - t_a.start() - t_b.start() - - # Let the load run for a bit - log.info("Warming up...") - time.sleep(2) - - # Safekeeper errors will propagate to the pageserver: it is correct that these are - # logged at error severity because they indicate the pageserver is trying to read - # a timeline that it shouldn't. - env.pageserver.allowed_errors.extend( - [ - ".*Timeline.*was cancelled.*", - ".*Timeline.*was not found.*", - ] - ) - - # Try deleting timelines while under load - sk = env.safekeepers[0] - sk_http = sk.http_client(auth_token=env.auth_keys.generate_tenant_token(tenant_id)) - - # Delete first timeline - log.info(f"Deleting {timeline_id_a}...") - assert sk_http.timeline_delete(tenant_id, timeline_id_a, only_local=True)["dir_existed"] - - # Delete second timeline - log.info(f"Deleting {timeline_id_b}...") - assert sk_http.timeline_delete(tenant_id, timeline_id_b, only_local=True)["dir_existed"] - - # Verify timelines are gone from disk - sk_data_dir = sk.data_dir - assert not (sk_data_dir / str(tenant_id) / str(timeline_id_a)).exists() - # assert not (sk_data_dir / str(tenant_id) / str(timeline_id_b)).exists() - - finally: - log.info("Stopping endpoints...") - # Stop endpoints with immediate mode because we deleted the timeline out from under the compute, which may cause it to hang - endpoint_a.stop(mode="immediate") - endpoint_b.stop(mode="immediate") - log.info("Joining threads...") - t_a.join() - t_b.join() - - # Basic pull_timeline test. # When live_sk_change is False, compute is restarted to change set of # safekeepers; otherwise it is live reload. From 8298bc903c0148db187074374b85f6ae5c0f9347 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 3 Mar 2025 17:52:59 +0000 Subject: [PATCH 073/207] pageserver: handle in-memory layer overlaps with persistent layers (#11000) ## Problem Image layers may be nested inside in-memory layers as diagnosed [here](https://github.com/neondatabase/neon/issues/10720#issuecomment-2649419252). The read path doesn't support this and may skip over the image layer, resulting in a failure to reconstruct the page. ## Summary of changes We already support nesting of image layers inside delta layers. The logic lives in `LayerMap::select_layer`. The main goal of this PR is to propagate the candidate in-memory layer down to that point and update the selection logic. Important changes are: 1. Support partial reads for the in-memory layer. Previously, we could only specify the start LSN of the read. We need to control the end LSN too. 2. `LayerMap::ranged_search` considers in-memory layers too. Previously, the search for in-memory layers was done explicitly in `Timeline::get_reconstruct_data_timeline`. Note that `LayerMap::ranged_search` now returns a weak readable layer which the `LayerManager` can upgrade. This dance is such that we can unit test the layer selection logic. 3. Update `LayerMap::select_layer` to consider the candidate in-memory layer too Loosely related drive bys: 1. Remove the "keys not found" tracking in the ranged search. This wasn't used anywhere and it just complicates things. 2. Remove the difficulty map stuff from the layer map. Again, not used anywhere. Closes https://github.com/neondatabase/neon/issues/9185 Closes https://github.com/neondatabase/neon/issues/10720 --- pageserver/benches/bench_layer_map.rs | 76 -- pageserver/src/tenant.rs | 179 +++- pageserver/src/tenant/layer_map.rs | 809 +++++++++++++----- .../layer_map/historic_layer_coverage.rs | 6 + pageserver/src/tenant/storage_layer.rs | 9 +- .../tenant/storage_layer/inmemory_layer.rs | 4 +- .../src/tenant/storage_layer/layer/tests.rs | 1 + pageserver/src/tenant/timeline.rs | 143 +++- .../src/tenant/timeline/layer_manager.rs | 38 +- 9 files changed, 949 insertions(+), 316 deletions(-) diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index e11af49449..e1444778b8 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -7,7 +7,6 @@ use std::time::Instant; use criterion::measurement::WallTime; use criterion::{BenchmarkGroup, Criterion, black_box, criterion_group, criterion_main}; -use pageserver::keyspace::{KeyPartitioning, KeySpace}; use pageserver::tenant::layer_map::LayerMap; use pageserver::tenant::storage_layer::{LayerName, PersistentLayerDesc}; use pageserver_api::key::Key; @@ -72,41 +71,6 @@ fn uniform_query_pattern(layer_map: &LayerMap) -> Vec<(Key, Lsn)> { .collect() } -// Construct a partitioning for testing get_difficulty map when we -// don't have an exact result of `collect_keyspace` to work with. -fn uniform_key_partitioning(layer_map: &LayerMap, _lsn: Lsn) -> KeyPartitioning { - let mut parts = Vec::new(); - - // We add a partition boundary at the start of each image layer, - // no matter what lsn range it covers. This is just the easiest - // thing to do. A better thing to do would be to get a real - // partitioning from some database. Even better, remove the need - // for key partitions by deciding where to create image layers - // directly based on a coverage-based difficulty map. - let mut keys: Vec<_> = layer_map - .iter_historic_layers() - .filter_map(|l| { - if l.is_incremental() { - None - } else { - let kr = l.get_key_range(); - Some(kr.start.next()) - } - }) - .collect(); - keys.sort(); - - let mut current_key = Key::from_hex("000000000000000000000000000000000000").unwrap(); - for key in keys { - parts.push(KeySpace { - ranges: vec![current_key..key], - }); - current_key = key; - } - - KeyPartitioning { parts } -} - // Benchmark using metadata extracted from our performance test environment, from // a project where we have run pgbench many timmes. The pgbench database was initialized // between each test run. @@ -148,41 +112,6 @@ fn bench_from_real_project(c: &mut Criterion) { // Choose uniformly distributed queries let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map); - // Choose inputs for get_difficulty_map - let latest_lsn = layer_map - .iter_historic_layers() - .map(|l| l.get_lsn_range().end) - .max() - .unwrap(); - let partitioning = uniform_key_partitioning(&layer_map, latest_lsn); - - // Check correctness of get_difficulty_map - // TODO put this in a dedicated test outside of this mod - { - println!("running correctness check"); - - let now = Instant::now(); - let result_bruteforce = layer_map.get_difficulty_map_bruteforce(latest_lsn, &partitioning); - assert!(result_bruteforce.len() == partitioning.parts.len()); - println!("Finished bruteforce in {:?}", now.elapsed()); - - let now = Instant::now(); - let result_fast = layer_map.get_difficulty_map(latest_lsn, &partitioning, None); - assert!(result_fast.len() == partitioning.parts.len()); - println!("Finished fast in {:?}", now.elapsed()); - - // Assert results are equal. Manually iterate for easier debugging. - let zip = std::iter::zip( - &partitioning.parts, - std::iter::zip(result_bruteforce, result_fast), - ); - for (_part, (bruteforce, fast)) in zip { - assert_eq!(bruteforce, fast); - } - - println!("No issues found"); - } - // Define and name the benchmark function let mut group = c.benchmark_group("real_map"); group.bench_function("uniform_queries", |b| { @@ -192,11 +121,6 @@ fn bench_from_real_project(c: &mut Criterion) { } }); }); - group.bench_function("get_difficulty_map", |b| { - b.iter(|| { - layer_map.get_difficulty_map(latest_lsn, &partitioning, Some(3)); - }); - }); group.finish(); } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 11d656eb25..776e523c2e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2501,6 +2501,7 @@ impl Tenant { initdb_lsn: Lsn, pg_version: u32, ctx: &RequestContext, + in_memory_layer_desc: Vec, delta_layer_desc: Vec, image_layer_desc: Vec<(Lsn, Vec<(pageserver_api::key::Key, bytes::Bytes)>)>, end_lsn: Lsn, @@ -2522,6 +2523,11 @@ impl Tenant { .force_create_image_layer(lsn, images, Some(initdb_lsn), ctx) .await?; } + for in_memory in in_memory_layer_desc { + tline + .force_create_in_memory_layer(in_memory, Some(initdb_lsn), ctx) + .await?; + } let layer_names = tline .layers .read() @@ -5913,6 +5919,8 @@ mod tests { #[cfg(feature = "testing")] use timeline::GcInfo; #[cfg(feature = "testing")] + use timeline::InMemoryLayerTestDesc; + #[cfg(feature = "testing")] use timeline::compaction::{KeyHistoryRetention, KeyLogAtLsn}; use timeline::{CompactOptions, DeltaLayerTestDesc}; use utils::id::TenantId; @@ -7925,6 +7933,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers Vec::new(), // delta layers vec![(Lsn(0x20), vec![(base_key, test_img("data key 1"))])], // image layers Lsn(0x20), // it's fine to not advance LSN to 0x30 while using 0x30 to get below because `get_vectored_impl` does not wait for LSN @@ -8012,6 +8021,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers Vec::new(), // delta layers vec![( Lsn(0x20), @@ -8227,6 +8237,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers // delta layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range( @@ -8307,6 +8318,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers // delta layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range( @@ -8380,6 +8392,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers // delta layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range( @@ -8512,6 +8525,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1), DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2), @@ -8705,6 +8719,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![DeltaLayerTestDesc::new_with_inferred_key_range( Lsn(0x10)..Lsn(0x40), delta1, @@ -8761,6 +8776,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers Vec::new(), image_layers, end_lsn, @@ -8967,6 +8983,7 @@ mod tests { Lsn(0x08), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range( Lsn(0x08)..Lsn(0x10), @@ -8985,7 +9002,7 @@ mod tests { delta3, ), ], // delta layers - vec![], // image layers + vec![], // image layers Lsn(0x50), ) .await? @@ -8996,6 +9013,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range( Lsn(0x10)..Lsn(0x48), @@ -9546,6 +9564,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta1), DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x48), delta2), @@ -9793,6 +9812,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + Vec::new(), // in-memory layers vec![ // delta1 and delta 2 only contain a single key but multiple updates DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x10)..Lsn(0x30), delta1), @@ -10028,6 +10048,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + vec![], // in-memory layers vec![], // delta layers vec![(Lsn(0x18), img_layer)], // image layers Lsn(0x18), @@ -10274,6 +10295,7 @@ mod tests { baseline_image_layer_lsn, DEFAULT_PG_VERSION, &ctx, + vec![], // in-memory layers vec![DeltaLayerTestDesc::new_with_inferred_key_range( delta_layer_start_lsn..delta_layer_end_lsn, delta_layer_spec, @@ -10305,6 +10327,158 @@ mod tests { Ok(()) } + #[cfg(feature = "testing")] + #[tokio::test] + async fn test_vectored_read_with_image_layer_inside_inmem() -> anyhow::Result<()> { + let harness = + TenantHarness::create("test_vectored_read_with_image_layer_inside_inmem").await?; + let (tenant, ctx) = harness.load().await; + + let will_init_keys = [2, 6]; + fn get_key(id: u32) -> Key { + let mut key = Key::from_hex("110000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + let mut expected_key_values = HashMap::new(); + + let baseline_image_layer_lsn = Lsn(0x10); + let mut baseline_img_layer = Vec::new(); + for i in 0..5 { + let key = get_key(i); + let value = format!("value {i}@{baseline_image_layer_lsn}"); + + let removed = expected_key_values.insert(key, value.clone()); + assert!(removed.is_none()); + + baseline_img_layer.push((key, Bytes::from(value))); + } + + let nested_image_layer_lsn = Lsn(0x50); + let mut nested_img_layer = Vec::new(); + for i in 5..10 { + let key = get_key(i); + let value = format!("value {i}@{nested_image_layer_lsn}"); + + let removed = expected_key_values.insert(key, value.clone()); + assert!(removed.is_none()); + + nested_img_layer.push((key, Bytes::from(value))); + } + + let frozen_layer = { + let lsn_range = Lsn(0x40)..Lsn(0x60); + let mut data = Vec::new(); + for i in 0..10 { + let key = get_key(i); + let key_in_nested = nested_img_layer + .iter() + .any(|(key_with_img, _)| *key_with_img == key); + let lsn = { + if key_in_nested { + Lsn(nested_image_layer_lsn.0 + 5) + } else { + lsn_range.start + } + }; + + let will_init = will_init_keys.contains(&i); + if will_init { + data.push((key, lsn, Value::WalRecord(NeonWalRecord::wal_init("")))); + + expected_key_values.insert(key, "".to_string()); + } else { + let delta = format!("@{lsn}"); + data.push(( + key, + lsn, + Value::WalRecord(NeonWalRecord::wal_append(&delta)), + )); + + expected_key_values + .get_mut(&key) + .expect("An image exists for each key") + .push_str(delta.as_str()); + } + } + + InMemoryLayerTestDesc { + lsn_range, + is_open: false, + data, + } + }; + + let (open_layer, last_record_lsn) = { + let start_lsn = Lsn(0x70); + let mut data = Vec::new(); + let mut end_lsn = Lsn(0); + for i in 0..10 { + let key = get_key(i); + let lsn = Lsn(start_lsn.0 + i as u64); + let delta = format!("@{lsn}"); + data.push(( + key, + lsn, + Value::WalRecord(NeonWalRecord::wal_append(&delta)), + )); + + expected_key_values + .get_mut(&key) + .expect("An image exists for each key") + .push_str(delta.as_str()); + + end_lsn = std::cmp::max(end_lsn, lsn); + } + + ( + InMemoryLayerTestDesc { + lsn_range: start_lsn..Lsn::MAX, + is_open: true, + data, + }, + end_lsn, + ) + }; + + assert!( + nested_image_layer_lsn > frozen_layer.lsn_range.start + && nested_image_layer_lsn < frozen_layer.lsn_range.end + ); + + let tline = tenant + .create_test_timeline_with_layers( + TIMELINE_ID, + baseline_image_layer_lsn, + DEFAULT_PG_VERSION, + &ctx, + vec![open_layer, frozen_layer], // in-memory layers + Vec::new(), // delta layers + vec![ + (baseline_image_layer_lsn, baseline_img_layer), + (nested_image_layer_lsn, nested_img_layer), + ], // image layers + last_record_lsn, + ) + .await?; + + let keyspace = KeySpace::single(get_key(0)..get_key(10)); + let results = tline + .get_vectored(keyspace, last_record_lsn, IoConcurrency::sequential(), &ctx) + .await + .expect("No vectored errors"); + for (key, res) in results { + let value = res.expect("No key errors"); + let expected_value = expected_key_values.remove(&key).expect("No unknown keys"); + assert_eq!(value, Bytes::from(expected_value.clone())); + + tracing::info!("key={key} value={expected_value}"); + } + + Ok(()) + } + fn sort_layer_key(k1: &PersistentLayerKey, k2: &PersistentLayerKey) -> std::cmp::Ordering { ( k1.is_delta, @@ -10420,6 +10594,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + vec![], // in-memory layers vec![ DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1), DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2), @@ -10804,6 +10979,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + vec![], // in-memory layers vec![ // delta1/2/4 only contain a single key but multiple updates DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1), @@ -11055,6 +11231,7 @@ mod tests { Lsn(0x10), DEFAULT_PG_VERSION, &ctx, + vec![], // in-memory layers vec![ // delta1/2/4 only contain a single key but multiple updates DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x28), delta1), diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 59f5a6bd90..2b04e53f10 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -62,8 +62,7 @@ use utils::lsn::Lsn; use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc}; use crate::context::RequestContext; -use crate::keyspace::KeyPartitioning; -use crate::tenant::storage_layer::InMemoryLayer; +use crate::tenant::storage_layer::{InMemoryLayer, ReadableLayerWeak}; /// /// LayerMap tracks what layers exist on a timeline. @@ -167,7 +166,7 @@ impl Drop for BatchedUpdates<'_> { /// Return value of LayerMap::search #[derive(Eq, PartialEq, Debug, Hash)] pub struct SearchResult { - pub layer: Arc, + pub layer: ReadableLayerWeak, pub lsn_floor: Lsn, } @@ -175,19 +174,37 @@ pub struct SearchResult { /// /// Contains a mapping from a layer description to a keyspace /// accumulator that contains all the keys which intersect the layer -/// from the original search space. Keys that were not found are accumulated -/// in a separate key space accumulator. +/// from the original search space. #[derive(Debug)] pub struct RangeSearchResult { pub found: HashMap, - pub not_found: KeySpaceAccum, } impl RangeSearchResult { fn new() -> Self { Self { found: HashMap::new(), - not_found: KeySpaceAccum::new(), + } + } + + fn map_to_in_memory_layer( + in_memory_layer: Option, + range: Range, + ) -> RangeSearchResult { + match in_memory_layer { + Some(inmem) => { + let search_result = SearchResult { + lsn_floor: inmem.get_lsn_range().start, + layer: ReadableLayerWeak::InMemoryLayer(inmem), + }; + + let mut accum = KeySpaceAccum::new(); + accum.add_range(range); + RangeSearchResult { + found: HashMap::from([(search_result, accum)]), + } + } + None => RangeSearchResult::new(), } } } @@ -199,6 +216,7 @@ struct RangeSearchCollector where Iter: Iterator>)>, { + in_memory_layer: Option, delta_coverage: Peekable, image_coverage: Peekable, key_range: Range, @@ -234,10 +252,12 @@ where fn new( key_range: Range, end_lsn: Lsn, + in_memory_layer: Option, delta_coverage: Iter, image_coverage: Iter, ) -> Self { Self { + in_memory_layer, delta_coverage: delta_coverage.peekable(), image_coverage: image_coverage.peekable(), key_range, @@ -266,8 +286,7 @@ where return self.result; } Some(layer_type) => { - // Changes for the range exist. Record anything before the first - // coverage change as not found. + // Changes for the range exist. let coverage_start = layer_type.next_change_at_key(); let range_before = self.key_range.start..coverage_start; self.pad_range(range_before); @@ -297,10 +316,22 @@ where self.result } - /// Mark a range as not found (i.e. no layers intersect it) + /// Map a range which does not intersect any persistent layers to + /// the in-memory layer candidate. fn pad_range(&mut self, key_range: Range) { if !key_range.is_empty() { - self.result.not_found.add_range(key_range); + if let Some(ref inmem) = self.in_memory_layer { + let search_result = SearchResult { + layer: ReadableLayerWeak::InMemoryLayer(inmem.clone()), + lsn_floor: inmem.get_lsn_range().start, + }; + + self.result + .found + .entry(search_result) + .or_default() + .add_range(key_range); + } } } @@ -310,6 +341,7 @@ where let selected = LayerMap::select_layer( self.current_delta.clone(), self.current_image.clone(), + self.in_memory_layer.clone(), self.end_lsn, ); @@ -365,6 +397,24 @@ where } } +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub struct InMemoryLayerDesc { + handle: InMemoryLayerHandle, + lsn_range: Range, +} + +impl InMemoryLayerDesc { + pub(crate) fn get_lsn_range(&self) -> Range { + self.lsn_range.clone() + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +enum InMemoryLayerHandle { + Open, + Frozen(usize), +} + impl LayerMap { /// /// Find the latest layer (by lsn.end) that covers the given @@ -394,69 +444,161 @@ impl LayerMap { /// layer result, or simplify the api to `get_latest_image` and /// `get_latest_delta`, and only call `get_latest_image` once. /// - /// NOTE: This only searches the 'historic' layers, *not* the - /// 'open' and 'frozen' layers! - /// pub fn search(&self, key: Key, end_lsn: Lsn) -> Option { - let version = self.historic.get().unwrap().get_version(end_lsn.0 - 1)?; + let in_memory_layer = self.search_in_memory_layer(end_lsn); + + let version = match self.historic.get().unwrap().get_version(end_lsn.0 - 1) { + Some(version) => version, + None => { + return in_memory_layer.map(|desc| SearchResult { + lsn_floor: desc.get_lsn_range().start, + layer: ReadableLayerWeak::InMemoryLayer(desc), + }); + } + }; + let latest_delta = version.delta_coverage.query(key.to_i128()); let latest_image = version.image_coverage.query(key.to_i128()); - Self::select_layer(latest_delta, latest_image, end_lsn) + Self::select_layer(latest_delta, latest_image, in_memory_layer, end_lsn) } + /// Select a layer from three potential candidates (in-memory, delta and image layer). + /// The candidates represent the first layer of each type which intersect a key range. + /// + /// Layer types have an in implicit priority (image > delta > in-memory). For instance, + /// if we have the option of reading an LSN range from both an image and a delta, we + /// should read from the image. fn select_layer( delta_layer: Option>, image_layer: Option>, + in_memory_layer: Option, end_lsn: Lsn, ) -> Option { assert!(delta_layer.as_ref().is_none_or(|l| l.is_delta())); assert!(image_layer.as_ref().is_none_or(|l| !l.is_delta())); - match (delta_layer, image_layer) { - (None, None) => None, - (None, Some(image)) => { + match (delta_layer, image_layer, in_memory_layer) { + (None, None, None) => None, + (None, Some(image), None) => { let lsn_floor = image.get_lsn_range().start; Some(SearchResult { - layer: image, + layer: ReadableLayerWeak::PersistentLayer(image), lsn_floor, }) } - (Some(delta), None) => { + (Some(delta), None, None) => { let lsn_floor = delta.get_lsn_range().start; Some(SearchResult { - layer: delta, + layer: ReadableLayerWeak::PersistentLayer(delta), lsn_floor, }) } - (Some(delta), Some(image)) => { + (Some(delta), Some(image), None) => { let img_lsn = image.get_lsn_range().start; let image_is_newer = image.get_lsn_range().end >= delta.get_lsn_range().end; let image_exact_match = img_lsn + 1 == end_lsn; if image_is_newer || image_exact_match { Some(SearchResult { - layer: image, + layer: ReadableLayerWeak::PersistentLayer(image), + lsn_floor: img_lsn, + }) + } else { + // If the delta overlaps with the image in the LSN dimension, do a partial + // up to the image layer. + let lsn_floor = + std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1); + Some(SearchResult { + layer: ReadableLayerWeak::PersistentLayer(delta), + lsn_floor, + }) + } + } + (None, None, Some(inmem)) => { + let lsn_floor = inmem.get_lsn_range().start; + Some(SearchResult { + layer: ReadableLayerWeak::InMemoryLayer(inmem), + lsn_floor, + }) + } + (None, Some(image), Some(inmem)) => { + // If the in-memory layer overlaps with the image in the LSN dimension, do a partial + // up to the image layer. + let img_lsn = image.get_lsn_range().start; + let image_is_newer = image.get_lsn_range().end >= inmem.get_lsn_range().end; + let image_exact_match = img_lsn + 1 == end_lsn; + if image_is_newer || image_exact_match { + Some(SearchResult { + layer: ReadableLayerWeak::PersistentLayer(image), lsn_floor: img_lsn, }) } else { let lsn_floor = - std::cmp::max(delta.get_lsn_range().start, image.get_lsn_range().start + 1); + std::cmp::max(inmem.get_lsn_range().start, image.get_lsn_range().start + 1); Some(SearchResult { - layer: delta, + layer: ReadableLayerWeak::InMemoryLayer(inmem), lsn_floor, }) } } + (Some(delta), None, Some(inmem)) => { + // Overlaps between delta and in-memory layers are not a valid + // state, but we handle them here for completeness. + let delta_end = delta.get_lsn_range().end; + let delta_is_newer = delta_end >= inmem.get_lsn_range().end; + let delta_exact_match = delta_end == end_lsn; + if delta_is_newer || delta_exact_match { + Some(SearchResult { + lsn_floor: delta.get_lsn_range().start, + layer: ReadableLayerWeak::PersistentLayer(delta), + }) + } else { + // If the in-memory layer overlaps with the delta in the LSN dimension, do a partial + // up to the delta layer. + let lsn_floor = + std::cmp::max(inmem.get_lsn_range().start, delta.get_lsn_range().end); + Some(SearchResult { + layer: ReadableLayerWeak::InMemoryLayer(inmem), + lsn_floor, + }) + } + } + (Some(delta), Some(image), Some(inmem)) => { + // Determine the preferred persistent layer without taking the in-memory layer + // into consideration. + let persistent_res = + Self::select_layer(Some(delta.clone()), Some(image.clone()), None, end_lsn) + .unwrap(); + let persistent_l = match persistent_res.layer { + ReadableLayerWeak::PersistentLayer(l) => l, + ReadableLayerWeak::InMemoryLayer(_) => unreachable!(), + }; + + // Now handle the in-memory layer overlaps. + let inmem_res = if persistent_l.is_delta() { + Self::select_layer(Some(persistent_l), None, Some(inmem.clone()), end_lsn) + .unwrap() + } else { + Self::select_layer(None, Some(persistent_l), Some(inmem.clone()), end_lsn) + .unwrap() + }; + + Some(SearchResult { + layer: inmem_res.layer, + // Use the more restrictive LSN floor + lsn_floor: std::cmp::max(persistent_res.lsn_floor, inmem_res.lsn_floor), + }) + } } } pub fn range_search(&self, key_range: Range, end_lsn: Lsn) -> RangeSearchResult { + let in_memory_layer = self.search_in_memory_layer(end_lsn); + let version = match self.historic.get().unwrap().get_version(end_lsn.0 - 1) { Some(version) => version, None => { - let mut result = RangeSearchResult::new(); - result.not_found.add_range(key_range); - return result; + return RangeSearchResult::map_to_in_memory_layer(in_memory_layer, key_range); } }; @@ -464,7 +606,13 @@ impl LayerMap { let delta_changes = version.delta_coverage.range_overlaps(&raw_range); let image_changes = version.image_coverage.range_overlaps(&raw_range); - let collector = RangeSearchCollector::new(key_range, end_lsn, delta_changes, image_changes); + let collector = RangeSearchCollector::new( + key_range, + end_lsn, + in_memory_layer, + delta_changes, + image_changes, + ); collector.collect() } @@ -571,17 +719,36 @@ impl LayerMap { } /// Get a ref counted pointer for the first in memory layer that matches the provided predicate. - pub fn find_in_memory_layer(&self, mut pred: Pred) -> Option> - where - Pred: FnMut(&Arc) -> bool, - { + pub(crate) fn search_in_memory_layer(&self, below: Lsn) -> Option { + let is_below = |l: &Arc| { + let start_lsn = l.get_lsn_range().start; + below > start_lsn + }; + if let Some(open) = &self.open_layer { - if pred(open) { - return Some(open.clone()); + if is_below(open) { + return Some(InMemoryLayerDesc { + handle: InMemoryLayerHandle::Open, + lsn_range: open.get_lsn_range(), + }); } } - self.frozen_layers.iter().rfind(|l| pred(l)).cloned() + self.frozen_layers + .iter() + .enumerate() + .rfind(|(_idx, l)| is_below(l)) + .map(|(idx, l)| InMemoryLayerDesc { + handle: InMemoryLayerHandle::Frozen(idx), + lsn_range: l.get_lsn_range(), + }) + } + + pub(crate) fn in_memory_layer(&self, desc: &InMemoryLayerDesc) -> Arc { + match desc.handle { + InMemoryLayerHandle::Open => self.open_layer.as_ref().unwrap().clone(), + InMemoryLayerHandle::Frozen(idx) => self.frozen_layers[idx].clone(), + } } /// @@ -737,136 +904,6 @@ impl LayerMap { max_stacked_deltas } - /// Count how many reimage-worthy layers we need to visit for given key-lsn pair. - /// - /// The `partition_range` argument is used as context for the reimage-worthiness decision. - /// - /// Used as a helper for correctness checks only. Performance not critical. - pub fn get_difficulty(&self, lsn: Lsn, key: Key, partition_range: &Range) -> usize { - match self.search(key, lsn) { - Some(search_result) => { - if search_result.layer.is_incremental() { - (Self::is_reimage_worthy(&search_result.layer, partition_range) as usize) - + self.get_difficulty(search_result.lsn_floor, key, partition_range) - } else { - 0 - } - } - None => 0, - } - } - - /// Used for correctness checking. Results are expected to be identical to - /// self.get_difficulty_map. Assumes self.search is correct. - pub fn get_difficulty_map_bruteforce( - &self, - lsn: Lsn, - partitioning: &KeyPartitioning, - ) -> Vec { - // Looking at the difficulty as a function of key, it could only increase - // when a delta layer starts or an image layer ends. Therefore it's sufficient - // to check the difficulties at: - // - the key.start for each non-empty part range - // - the key.start for each delta - // - the key.end for each image - let keys_iter: Box> = { - let mut keys: Vec = self - .iter_historic_layers() - .map(|layer| { - if layer.is_incremental() { - layer.get_key_range().start - } else { - layer.get_key_range().end - } - }) - .collect(); - keys.sort(); - Box::new(keys.into_iter()) - }; - let mut keys_iter = keys_iter.peekable(); - - // Iter the partition and keys together and query all the necessary - // keys, computing the max difficulty for each part. - partitioning - .parts - .iter() - .map(|part| { - let mut difficulty = 0; - // Partition ranges are assumed to be sorted and disjoint - // TODO assert it - for range in &part.ranges { - if !range.is_empty() { - difficulty = - std::cmp::max(difficulty, self.get_difficulty(lsn, range.start, range)); - } - while let Some(key) = keys_iter.peek() { - if key >= &range.end { - break; - } - let key = keys_iter.next().unwrap(); - if key < range.start { - continue; - } - difficulty = - std::cmp::max(difficulty, self.get_difficulty(lsn, key, range)); - } - } - difficulty - }) - .collect() - } - - /// For each part of a keyspace partitioning, return the maximum number of layers - /// that would be needed for page reconstruction in that part at the given LSN. - /// - /// If `limit` is provided we don't try to count above that number. - /// - /// This method is used to decide where to create new image layers. Computing the - /// result for the entire partitioning at once allows this function to be more - /// efficient, and further optimization is possible by using iterators instead, - /// to allow early return. - /// - /// TODO actually use this method instead of count_deltas. Currently we only use - /// it for benchmarks. - pub fn get_difficulty_map( - &self, - lsn: Lsn, - partitioning: &KeyPartitioning, - limit: Option, - ) -> Vec { - // TODO This is a naive implementation. Perf improvements to do: - // 1. Instead of calling self.image_coverage and self.count_deltas, - // iterate the image and delta coverage only once. - partitioning - .parts - .iter() - .map(|part| { - let mut difficulty = 0; - for range in &part.ranges { - if limit == Some(difficulty) { - break; - } - for (img_range, last_img) in self.image_coverage(range, lsn) { - if limit == Some(difficulty) { - break; - } - let img_lsn = if let Some(last_img) = last_img { - last_img.get_lsn_range().end - } else { - Lsn(0) - }; - - if img_lsn < lsn { - let num_deltas = self.count_deltas(&img_range, &(img_lsn..lsn), limit); - difficulty = std::cmp::max(difficulty, num_deltas); - } - } - } - difficulty - }) - .collect() - } - /// Return all L0 delta layers pub fn level0_deltas(&self) -> &Vec> { &self.l0_delta_layers @@ -1069,6 +1106,10 @@ mod tests { use std::collections::HashMap; use std::path::PathBuf; + use crate::{ + DEFAULT_PG_VERSION, + tenant::{harness::TenantHarness, storage_layer::LayerName}, + }; use pageserver_api::key::DBDIR_KEY; use pageserver_api::keyspace::{KeySpace, KeySpaceRandomAccum}; use utils::id::{TenantId, TimelineId}; @@ -1076,7 +1117,6 @@ mod tests { use super::*; use crate::tenant::IndexPart; - use crate::tenant::storage_layer::LayerName; #[derive(Clone)] struct LayerDesc { @@ -1101,7 +1141,6 @@ mod tests { } fn assert_range_search_result_eq(lhs: RangeSearchResult, rhs: RangeSearchResult) { - assert_eq!(lhs.not_found.to_keyspace(), rhs.not_found.to_keyspace()); let lhs: HashMap = lhs .found .into_iter() @@ -1127,17 +1166,12 @@ mod tests { let mut key = key_range.start; while key != key_range.end { let res = layer_map.search(key, end_lsn); - match res { - Some(res) => { - range_search_result - .found - .entry(res) - .or_default() - .add_key(key); - } - None => { - range_search_result.not_found.add_key(key); - } + if let Some(res) = res { + range_search_result + .found + .entry(res) + .or_default() + .add_key(key); } key = key.next(); @@ -1152,20 +1186,49 @@ mod tests { let range = Key::from_i128(100)..Key::from_i128(200); let res = layer_map.range_search(range.clone(), Lsn(100)); - assert_eq!( - res.not_found.to_keyspace(), - KeySpace { - ranges: vec![range] - } - ); + assert_range_search_result_eq(res, RangeSearchResult::new()); } - #[test] - fn ranged_search() { + #[tokio::test] + async fn ranged_search() { + let harness = TenantHarness::create("ranged_search").await.unwrap(); + let (tenant, ctx) = harness.load().await; + let timeline_id = TimelineId::generate(); + // Create the timeline such that the in-memory layers can be written + // to the timeline directory. + tenant + .create_test_timeline(timeline_id, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + + let gate = utils::sync::gate::Gate::default(); + let add_in_memory_layer = async |layer_map: &mut LayerMap, lsn_range: Range| { + let layer = InMemoryLayer::create( + harness.conf, + timeline_id, + harness.tenant_shard_id, + lsn_range.start, + &gate, + &ctx, + ) + .await + .unwrap(); + + layer.freeze(lsn_range.end).await; + + layer_map.frozen_layers.push_back(Arc::new(layer)); + }; + + let in_memory_layer_configurations = [ + vec![], + // Overlaps with the top-most image + vec![Lsn(35)..Lsn(50)], + ]; + let layers = vec![ LayerDesc { key_range: Key::from_i128(15)..Key::from_i128(50), - lsn_range: Lsn(0)..Lsn(5), + lsn_range: Lsn(5)..Lsn(6), is_delta: false, }, LayerDesc { @@ -1185,19 +1248,27 @@ mod tests { }, LayerDesc { key_range: Key::from_i128(35)..Key::from_i128(40), - lsn_range: Lsn(35)..Lsn(40), + lsn_range: Lsn(40)..Lsn(41), is_delta: false, }, ]; - let layer_map = create_layer_map(layers.clone()); - for start in 0..60 { - for end in (start + 1)..60 { - let range = Key::from_i128(start)..Key::from_i128(end); - let result = layer_map.range_search(range.clone(), Lsn(100)); - let expected = brute_force_range_search(&layer_map, range, Lsn(100)); + let mut layer_map = create_layer_map(layers.clone()); + for in_memory_layers in in_memory_layer_configurations { + for in_mem_layer_range in in_memory_layers { + add_in_memory_layer(&mut layer_map, in_mem_layer_range).await; + } - assert_range_search_result_eq(result, expected); + for start in 0..60 { + for end in (start + 1)..60 { + let range = Key::from_i128(start)..Key::from_i128(end); + let result = layer_map.range_search(range.clone(), Lsn(100)); + let expected = brute_force_range_search(&layer_map, range, Lsn(100)); + + eprintln!("{start}..{end}: {result:?}"); + + assert_range_search_result_eq(result, expected); + } } } } @@ -1490,12 +1561,348 @@ mod tests { // Sanity: the layer that holds latest data for the DBDIR key should always be visible // (just using this key as a key that will always exist for any layermap fixture) - let dbdir_layer = layer_map - .search(DBDIR_KEY, index.metadata.disk_consistent_lsn()) - .unwrap(); + let dbdir_layer = { + let readable_layer = layer_map + .search(DBDIR_KEY, index.metadata.disk_consistent_lsn()) + .unwrap(); + + match readable_layer.layer { + ReadableLayerWeak::PersistentLayer(desc) => desc, + ReadableLayerWeak::InMemoryLayer(_) => unreachable!(""), + } + }; assert!(matches!( - layer_visibilities.get(&dbdir_layer.layer).unwrap(), + layer_visibilities.get(&dbdir_layer).unwrap(), LayerVisibilityHint::Visible )); } } + +#[cfg(test)] +mod select_layer_tests { + use super::*; + + fn create_persistent_layer( + start_lsn: u64, + end_lsn: u64, + is_delta: bool, + ) -> Arc { + if !is_delta { + assert_eq!(end_lsn, start_lsn + 1); + } + + Arc::new(PersistentLayerDesc::new_test( + Key::MIN..Key::MAX, + Lsn(start_lsn)..Lsn(end_lsn), + is_delta, + )) + } + + fn create_inmem_layer(start_lsn: u64, end_lsn: u64) -> InMemoryLayerDesc { + InMemoryLayerDesc { + handle: InMemoryLayerHandle::Open, + lsn_range: Lsn(start_lsn)..Lsn(end_lsn), + } + } + + #[test] + fn test_select_layer_empty() { + assert!(LayerMap::select_layer(None, None, None, Lsn(100)).is_none()); + } + + #[test] + fn test_select_layer_only_delta() { + let delta = create_persistent_layer(10, 20, true); + let result = LayerMap::select_layer(Some(delta.clone()), None, None, Lsn(100)).unwrap(); + + assert_eq!(result.lsn_floor, Lsn(10)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + } + + #[test] + fn test_select_layer_only_image() { + let image = create_persistent_layer(10, 11, false); + let result = LayerMap::select_layer(None, Some(image.clone()), None, Lsn(100)).unwrap(); + + assert_eq!(result.lsn_floor, Lsn(10)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } + + #[test] + fn test_select_layer_only_inmem() { + let inmem = create_inmem_layer(10, 20); + let result = LayerMap::select_layer(None, None, Some(inmem.clone()), Lsn(100)).unwrap(); + + assert_eq!(result.lsn_floor, Lsn(10)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + } + + #[test] + fn test_select_layer_image_inside_delta() { + let delta = create_persistent_layer(10, 20, true); + let image = create_persistent_layer(15, 16, false); + + let result = + LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(100)) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(16)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + None, + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(15)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } + + #[test] + fn test_select_layer_newer_image() { + let delta = create_persistent_layer(10, 20, true); + let image = create_persistent_layer(25, 26, false); + + let result = + LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(30)) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(25)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + + let result = + LayerMap::select_layer(Some(delta.clone()), None, None, result.lsn_floor).unwrap(); + + assert_eq!(result.lsn_floor, Lsn(10)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + } + + #[test] + fn test_select_layer_delta_with_older_image() { + let delta = create_persistent_layer(15, 25, true); + let image = create_persistent_layer(10, 11, false); + + let result = + LayerMap::select_layer(Some(delta.clone()), Some(image.clone()), None, Lsn(30)) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(15)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + + let result = + LayerMap::select_layer(None, Some(image.clone()), None, result.lsn_floor).unwrap(); + + assert_eq!(result.lsn_floor, Lsn(10)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } + + #[test] + fn test_select_layer_image_inside_inmem() { + let image = create_persistent_layer(15, 16, false); + let inmem = create_inmem_layer(10, 25); + + let result = + LayerMap::select_layer(None, Some(image.clone()), Some(inmem.clone()), Lsn(30)) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(16)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + + let result = LayerMap::select_layer( + None, + Some(image.clone()), + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(15)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + + let result = + LayerMap::select_layer(None, None, Some(inmem.clone()), result.lsn_floor).unwrap(); + assert_eq!(result.lsn_floor, Lsn(10)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + } + + #[test] + fn test_select_layer_delta_inside_inmem() { + let delta_top = create_persistent_layer(15, 20, true); + let delta_bottom = create_persistent_layer(10, 15, true); + let inmem = create_inmem_layer(15, 25); + + let result = + LayerMap::select_layer(Some(delta_top.clone()), None, Some(inmem.clone()), Lsn(30)) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(20)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + + let result = LayerMap::select_layer( + Some(delta_top.clone()), + None, + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(15)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta_top)) + ); + + let result = LayerMap::select_layer( + Some(delta_bottom.clone()), + None, + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + assert_eq!(result.lsn_floor, Lsn(10)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta_bottom)) + ); + } + + #[test] + fn test_select_layer_all_overlap_1() { + let inmem = create_inmem_layer(10, 30); + let delta = create_persistent_layer(15, 25, true); + let image = create_persistent_layer(20, 21, false); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + Lsn(50), + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(25)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(21)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(20)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } + + #[test] + fn test_select_layer_all_overlap_2() { + let inmem = create_inmem_layer(20, 30); + let delta = create_persistent_layer(10, 40, true); + let image = create_persistent_layer(25, 26, false); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + Lsn(50), + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(26)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(25)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } + + #[test] + fn test_select_layer_all_overlap_3() { + let inmem = create_inmem_layer(30, 40); + let delta = create_persistent_layer(10, 30, true); + let image = create_persistent_layer(20, 21, false); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + Some(inmem.clone()), + Lsn(50), + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(30)); + assert!(matches!(result.layer, ReadableLayerWeak::InMemoryLayer(l) if l == inmem)); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + None, + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(21)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &delta)) + ); + + let result = LayerMap::select_layer( + Some(delta.clone()), + Some(image.clone()), + None, + result.lsn_floor, + ) + .unwrap(); + + assert_eq!(result.lsn_floor, Lsn(20)); + assert!( + matches!(result.layer, ReadableLayerWeak::PersistentLayer(l) if Arc::ptr_eq(&l, &image)) + ); + } +} diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index f8bec48886..b3dc8e56a3 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -63,6 +63,8 @@ pub struct HistoricLayerCoverage { /// The latest state head: LayerCoverageTuple, + /// TODO: this could be an ordered vec using binary search. + /// We push into this map everytime we add a layer, so might see some benefit /// All previous states historic: BTreeMap>, } @@ -419,6 +421,10 @@ pub struct BufferedHistoricLayerCoverage { buffer: BTreeMap>, /// All current layers. This is not used for search. Only to make rebuilds easier. + // TODO: This map is never cleared. Rebuilds could use the post-trim last entry of + // [`Self::historic_coverage`] instead of doubling memory usage. + // [`Self::len`]: can require rebuild and serve from latest historic + // [`Self::iter`]: already requires rebuild => can serve from latest historic layers: BTreeMap, } diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 7f313f46a2..ece163b24a 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -40,6 +40,7 @@ use utils::sync::gate::GateGuard; use self::inmemory_layer::InMemoryLayerFileId; use super::PageReconstructError; +use super::layer_map::InMemoryLayerDesc; use super::timeline::{GetVectoredError, ReadPath}; use crate::config::PageServerConf; use crate::context::{AccessStatsBehavior, RequestContext}; @@ -721,6 +722,12 @@ struct LayerToVisitId { lsn_floor: Lsn, } +#[derive(Debug, PartialEq, Eq, Hash)] +pub enum ReadableLayerWeak { + PersistentLayer(Arc), + InMemoryLayer(InMemoryLayerDesc), +} + /// Layer wrapper for the read path. Note that it is valid /// to use these layers even after external operations have /// been performed on them (compaction, freeze, etc.). @@ -873,7 +880,7 @@ impl ReadableLayer { } ReadableLayer::InMemoryLayer(layer) => { layer - .get_values_reconstruct_data(keyspace, lsn_range.end, reconstruct_state, ctx) + .get_values_reconstruct_data(keyspace, lsn_range, reconstruct_state, ctx) .await } } diff --git a/pageserver/src/tenant/storage_layer/inmemory_layer.rs b/pageserver/src/tenant/storage_layer/inmemory_layer.rs index ffdfe1dc27..46135b5330 100644 --- a/pageserver/src/tenant/storage_layer/inmemory_layer.rs +++ b/pageserver/src/tenant/storage_layer/inmemory_layer.rs @@ -416,7 +416,7 @@ impl InMemoryLayer { pub(crate) async fn get_values_reconstruct_data( self: &Arc, keyspace: KeySpace, - end_lsn: Lsn, + lsn_range: Range, reconstruct_state: &mut ValuesReconstructState, ctx: &RequestContext, ) -> Result<(), GetVectoredError> { @@ -433,8 +433,6 @@ impl InMemoryLayer { let mut reads: HashMap> = HashMap::new(); let mut ios: HashMap<(Key, Lsn), OnDiskValueIo> = Default::default(); - let lsn_range = self.start_lsn..end_lsn; - for range in keyspace.ranges.iter() { for (key, vec_map) in inner .index diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index d43dfefdbc..a7f3c6b8c5 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -49,6 +49,7 @@ async fn smoke_test() { Lsn(0x10), 14, &ctx, + Default::default(), // in-memory layers Default::default(), image_layers, Lsn(0x100), diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 851f84f603..17dbcee74e 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3914,39 +3914,22 @@ impl Timeline { let guard = timeline.layers.read().await; let layers = guard.layer_map()?; - let in_memory_layer = layers.find_in_memory_layer(|l| { - let start_lsn = l.get_lsn_range().start; - cont_lsn > start_lsn - }); + for range in unmapped_keyspace.ranges.iter() { + let results = layers.range_search(range.clone(), cont_lsn); - match in_memory_layer { - Some(l) => { - let lsn_range = l.get_lsn_range().start..cont_lsn; - fringe.update( - ReadableLayer::InMemoryLayer(l), - unmapped_keyspace.clone(), - lsn_range, - ); - } - None => { - for range in unmapped_keyspace.ranges.iter() { - let results = layers.range_search(range.clone(), cont_lsn); - - results - .found - .into_iter() - .map(|(SearchResult { layer, lsn_floor }, keyspace_accum)| { - ( - ReadableLayer::PersistentLayer(guard.get_from_desc(&layer)), - keyspace_accum.to_keyspace(), - lsn_floor..cont_lsn, - ) - }) - .for_each(|(layer, keyspace, lsn_range)| { - fringe.update(layer, keyspace, lsn_range) - }); - } - } + results + .found + .into_iter() + .map(|(SearchResult { layer, lsn_floor }, keyspace_accum)| { + ( + guard.upgrade(layer), + keyspace_accum.to_keyspace(), + lsn_floor..cont_lsn, + ) + }) + .for_each(|(layer, keyspace, lsn_range)| { + fringe.update(layer, keyspace, lsn_range) + }); } // It's safe to drop the layer map lock after planning the next round of reads. @@ -5555,6 +5538,14 @@ pub struct DeltaLayerTestDesc { pub data: Vec<(Key, Lsn, Value)>, } +#[cfg(test)] +#[derive(Clone)] +pub struct InMemoryLayerTestDesc { + pub lsn_range: Range, + pub data: Vec<(Key, Lsn, Value)>, + pub is_open: bool, +} + #[cfg(test)] impl DeltaLayerTestDesc { pub fn new(lsn_range: Range, key_range: Range, data: Vec<(Key, Lsn, Value)>) -> Self { @@ -6567,6 +6558,92 @@ impl Timeline { Ok(()) } + /// Force create an in-memory layer and place them into the layer map. + #[cfg(test)] + pub(super) async fn force_create_in_memory_layer( + self: &Arc, + mut in_memory: InMemoryLayerTestDesc, + check_start_lsn: Option, + ctx: &RequestContext, + ) -> anyhow::Result<()> { + use utils::bin_ser::BeSer; + + // Validate LSNs + if let Some(check_start_lsn) = check_start_lsn { + assert!(in_memory.lsn_range.start >= check_start_lsn); + } + + let last_record_lsn = self.get_last_record_lsn(); + let layer_end_lsn = if in_memory.is_open { + in_memory + .data + .iter() + .map(|(_key, lsn, _value)| lsn) + .max() + .cloned() + } else { + Some(in_memory.lsn_range.end) + }; + + if let Some(end) = layer_end_lsn { + assert!( + end <= last_record_lsn, + "advance last record lsn before inserting a layer, end_lsn={}, last_record_lsn={}", + end, + last_record_lsn, + ); + } + + in_memory.data.iter().for_each(|(_key, lsn, _value)| { + assert!(*lsn >= in_memory.lsn_range.start); + assert!(*lsn < in_memory.lsn_range.end); + }); + + // Build the batch + in_memory + .data + .sort_unstable_by(|(ka, la, _), (kb, lb, _)| (ka, la).cmp(&(kb, lb))); + + let data = in_memory + .data + .into_iter() + .map(|(key, lsn, value)| { + let value_size = value.serialized_size().unwrap() as usize; + (key.to_compact(), lsn, value_size, value) + }) + .collect::>(); + + let batch = SerializedValueBatch::from_values(data); + + // Create the in-memory layer and write the batch into it + let layer = InMemoryLayer::create( + self.conf, + self.timeline_id, + self.tenant_shard_id, + in_memory.lsn_range.start, + &self.gate, + ctx, + ) + .await + .unwrap(); + + layer.put_batch(batch, ctx).await.unwrap(); + if !in_memory.is_open { + layer.freeze(in_memory.lsn_range.end).await; + } + + info!("force created in-memory layer {:?}", in_memory.lsn_range); + + // Link the layer to the layer map + { + let mut guard = self.layers.write().await; + let layer_map = guard.open_mut().unwrap(); + layer_map.force_insert_in_memory_layer(Arc::new(layer)); + } + + Ok(()) + } + /// Return all keys at the LSN in the image layers #[cfg(test)] pub(crate) async fn inspect_image_layers( @@ -6999,6 +7076,7 @@ mod tests { Lsn(0x10), 14, &ctx, + Vec::new(), // in-memory layers delta_layers, image_layers, Lsn(0x100), @@ -7132,6 +7210,7 @@ mod tests { Lsn(0x10), 14, &ctx, + Vec::new(), // in-memory layers delta_layers, image_layers, Lsn(0x100), diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index e552ea83de..1b489028dc 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -8,14 +8,14 @@ use tracing::trace; use utils::id::TimelineId; use utils::lsn::{AtomicLsn, Lsn}; -use super::TimelineWriterState; +use super::{ReadableLayer, TimelineWriterState}; use crate::config::PageServerConf; use crate::context::RequestContext; use crate::metrics::TimelineMetrics; use crate::tenant::layer_map::{BatchedUpdates, LayerMap}; use crate::tenant::storage_layer::{ AsLayerDesc, InMemoryLayer, Layer, LayerVisibilityHint, PersistentLayerDesc, - PersistentLayerKey, ResidentLayer, + PersistentLayerKey, ReadableLayerWeak, ResidentLayer, }; /// Provides semantic APIs to manipulate the layer map. @@ -37,6 +37,21 @@ impl Default for LayerManager { } impl LayerManager { + pub(crate) fn upgrade(&self, weak: ReadableLayerWeak) -> ReadableLayer { + match weak { + ReadableLayerWeak::PersistentLayer(desc) => { + ReadableLayer::PersistentLayer(self.get_from_desc(&desc)) + } + ReadableLayerWeak::InMemoryLayer(desc) => { + let inmem = self + .layer_map() + .expect("no concurrent shutdown") + .in_memory_layer(&desc); + ReadableLayer::InMemoryLayer(inmem) + } + } + } + pub(crate) fn get_from_key(&self, key: &PersistentLayerKey) -> Layer { // The assumption for the `expect()` is that all code maintains the following invariant: // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor. @@ -470,6 +485,25 @@ impl OpenLayerManager { mapping.remove(layer); layer.delete_on_drop(); } + + #[cfg(test)] + pub(crate) fn force_insert_in_memory_layer(&mut self, layer: Arc) { + use pageserver_api::models::InMemoryLayerInfo; + + match layer.info() { + InMemoryLayerInfo::Open { .. } => { + assert!(self.layer_map.open_layer.is_none()); + self.layer_map.open_layer = Some(layer); + } + InMemoryLayerInfo::Frozen { lsn_start, .. } => { + if let Some(last) = self.layer_map.frozen_layers.back() { + assert!(last.get_lsn_range().end <= lsn_start); + } + + self.layer_map.frozen_layers.push_back(layer); + } + } + } } pub(crate) struct LayerFileManager(HashMap); From 9a4e2eab61844784b8323cddaae1ac3952b9f6f6 Mon Sep 17 00:00:00 2001 From: Alexander Lakhin Date: Mon, 3 Mar 2025 20:00:53 +0200 Subject: [PATCH 074/207] Fix artifact name for build with sanitizers (#11066) ## Problem When a build is made with sanitizers, this is not reflected in the artifact name, which can lead to overriding normal builds with sanitized ones. ## Summary of changes Take this property of a build into account when constructing the artifact name. --- .github/actions/run-python-test-set/action.yml | 8 +++++++- .github/workflows/_build-and-test-locally.yml | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/actions/run-python-test-set/action.yml b/.github/actions/run-python-test-set/action.yml index 122fe48b68..fa6f882161 100644 --- a/.github/actions/run-python-test-set/action.yml +++ b/.github/actions/run-python-test-set/action.yml @@ -44,6 +44,11 @@ inputs: description: 'Postgres version to use for tests' required: false default: 'v16' + sanitizers: + description: 'enabled or disabled' + required: false + default: 'disabled' + type: string benchmark_durations: description: 'benchmark durations JSON' required: false @@ -59,7 +64,7 @@ runs: if: inputs.build_type != 'remote' uses: ./.github/actions/download with: - name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}-artifact + name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build_type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact path: /tmp/neon aws-oicd-role-arn: ${{ inputs.aws-oicd-role-arn }} @@ -112,6 +117,7 @@ runs: ALLOW_FORWARD_COMPATIBILITY_BREAKAGE: contains(github.event.pull_request.labels.*.name, 'forward compatibility breakage') RERUN_FAILED: ${{ inputs.rerun_failed }} PG_VERSION: ${{ inputs.pg_version }} + SANITIZERS: ${{ inputs.sanitizers }} shell: bash -euxo pipefail {0} run: | # PLATFORM will be embedded in the perf test report diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 30fde127b0..6a2070424a 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -280,7 +280,7 @@ jobs: - name: Upload Neon artifact uses: ./.github/actions/upload with: - name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}-artifact + name: neon-${{ runner.os }}-${{ runner.arch }}-${{ inputs.build-type }}${{ inputs.sanitizers == 'enabled' && '-sanitized' || '' }}-artifact path: /tmp/neon aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} @@ -347,6 +347,7 @@ jobs: real_s3_region: eu-central-1 rerun_failed: true pg_version: ${{ matrix.pg_version }} + sanitizers: ${{ inputs.sanitizers }} aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} # `--session-timeout` is equal to (timeout-minutes - 10 minutes) * 60 seconds. # Attempt to stop tests gracefully to generate test reports @@ -359,7 +360,6 @@ jobs: PAGESERVER_VIRTUAL_FILE_IO_ENGINE: tokio-epoll-uring PAGESERVER_GET_VECTORED_CONCURRENT_IO: sidecar-task USE_LFC: ${{ matrix.lfc_state == 'with-lfc' && 'true' || 'false' }} - SANITIZERS: ${{ inputs.sanitizers }} # Temporary disable this step until we figure out why it's so flaky # Ref https://github.com/neondatabase/neon/issues/4540 From 5197e43396f65bab9f9bf54edf8e2b899b1c1b69 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 3 Mar 2025 19:04:01 +0000 Subject: [PATCH 075/207] pageserver: add recurse flag to layer download spec (#11068) I missed updating the open api spec in the original PR. We need this so that the cplane auto-generated client sees the flag. --- pageserver/src/http/openapi_spec.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 12252739fd..0fb9a240d5 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -842,6 +842,12 @@ paths: required: false schema: type: integer + - name: recurse + description: When set, will recurse with the downloads into ancestor timelines + in: query + required: false + schema: + type: boolean post: description: | Download all layers in the specified timeline's heatmap. The `tenant_shard_id` parameter From 6ca49b4d0c90009bc0c9b9934fe3d3835ade65ea Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 3 Mar 2025 19:16:03 +0000 Subject: [PATCH 076/207] safekeeper: fix a gap tracking edge case (#11054) The interpreted reader tracks a record aligned current position in the WAL stream. Partial reads move the stream internally, but not from the pov of the interpreted WAL reader. Hence, where new shards subscribe with a start position that matches the reader's current position, but we've also done some partial reads. This confuses the gap tracking. To make it more robust, update the current batch start to the min between the new start position and its current value. Since no record has been decoded yet (position matches), we can't have lost it --- safekeeper/src/send_interpreted_wal.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index 2c1c73c25c..bf03f27d48 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -184,6 +184,16 @@ impl InterpretedWalReaderState { to: *current_position, } } else { + // Edge case: The new shard is at the same current position as + // the reader. Note that the current position is WAL record aligned, + // so the reader might have done some partial reads and updated the + // batch start. If that's the case, adjust the batch start to match + // starting position of the new shard. It can lead to some shards + // seeing overlaps, but in that case the actual record LSNs are checked + // which should be fine based on the filtering logic. + if let Some(start) = current_batch_wal_start { + *start = std::cmp::min(*start, new_shard_start_pos); + } CurrentPositionUpdate::NotReset(*current_position) } } From dbf9a8026162f01f95c9e218180fa0885b37410b Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 3 Mar 2025 15:23:20 -0500 Subject: [PATCH 077/207] fix(pageserver): avoid flooding gc-compaction logs (#11024) ## Problem The "did not trigger" gets logged at 10k/minute in staging. ## Summary of changes Change it to debug level. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index c835980a7d..76c28e11ab 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -321,7 +321,7 @@ impl GcCompactionQueue { l1_size, l2_size, l2_lsn, gc_cutoff ); } else { - info!( + debug!( "did not trigger auto gc-compaction: l1_size={}, l2_size={}, l2_lsn={}, gc_cutoff={}", l1_size, l2_size, l2_lsn, gc_cutoff ); From 6d0976dad5517531a2163ddd67e3d1e1b9cd9756 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 3 Mar 2025 16:05:43 -0500 Subject: [PATCH 078/207] feat(pageserver): persist reldir v2 migration status (#10980) ## Problem part of https://github.com/neondatabase/neon/issues/9516 ## Summary of changes Similar to the aux v2 migration, we persist the relv2 migration status into index_part, so that even the config item is set to false, we will still read from the v2 storage to avoid loss of data. Note that only the two variants `None` and `Some(RelSizeMigration::Migrating)` are used for now. We don't have full migration implemented so it will never be set to `RelSizeMigration::Migrated`. --------- Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/models.rs | 19 ++++++++ pageserver/src/http/routes.rs | 1 + pageserver/src/pgdatadir_mapping.rs | 47 +++++++++++++++++-- pageserver/src/tenant.rs | 6 ++- .../src/tenant/remote_timeline_client.rs | 19 +++++++- .../tenant/remote_timeline_client/index.rs | 16 +------ pageserver/src/tenant/timeline.rs | 28 ++++++++++- pageserver/src/tenant/timeline/delete.rs | 1 + .../performance/test_perf_many_relations.py | 7 +++ test_runner/regress/test_pg_regress.py | 15 ++++++ test_runner/regress/test_relations.py | 44 ++++++++++++++++- 11 files changed, 178 insertions(+), 25 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index ea565e7769..fabfe28aa2 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -1165,6 +1165,21 @@ pub struct OffloadedTimelineInfo { pub archived_at: chrono::DateTime, } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub enum RelSizeMigration { + /// The tenant is using the old rel_size format. + /// Note that this enum is persisted as `Option` in the index part, so + /// `None` is the same as `Some(RelSizeMigration::Legacy)`. + Legacy, + /// The tenant is migrating to the new rel_size format. Both old and new rel_size format are + /// persisted in the index part. The read path will read both formats and merge them. + Migrating, + /// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted + /// in the index part, and the read path will not read the old format. + Migrated, +} + /// This represents the output of the "timeline_detail" and "timeline_list" API calls. #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TimelineInfo { @@ -1243,7 +1258,11 @@ pub struct TimelineInfo { // Forward compatibility: a previous version of the pageserver will receive a JSON. serde::Deserialize does // not deny unknown fields by default so it's safe to set the field to some value, though it won't be // read. + /// Whether the timeline is archived. pub is_archived: Option, + + /// The status of the rel_size migration. + pub rel_size_migration: Option, } #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index a3ee31d6e6..cd79aa6680 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -481,6 +481,7 @@ async fn build_timeline_info_common( state, is_archived: Some(is_archived), + rel_size_migration: Some(timeline.get_rel_size_v2_status()), walreceiver_status, }; diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index c10dfb4542..8aa96dd672 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -21,6 +21,7 @@ use pageserver_api::key::{ slru_segment_key_range, slru_segment_size_to_key, twophase_file_key, twophase_key_range, }; use pageserver_api::keyspace::SparseKeySpace; +use pageserver_api::models::RelSizeMigration; use pageserver_api::record::NeonWalRecord; use pageserver_api::reltag::{BlockNumber, RelTag, SlruKind}; use pageserver_api::shard::ShardIdentity; @@ -492,7 +493,9 @@ impl Timeline { // Otherwise, read the old reldir keyspace. // TODO: if IndexPart::rel_size_migration is `Migrated`, we only need to read from v2. - if self.get_rel_size_v2_enabled() { + if let RelSizeMigration::Migrated | RelSizeMigration::Migrating = + self.get_rel_size_v2_status() + { // fetch directory listing (new) let key = rel_tag_sparse_key(tag.spcnode, tag.dbnode, tag.relnode, tag.forknum); let buf = RelDirExists::decode_option(version.sparse_get(self, key, ctx).await?) @@ -544,7 +547,7 @@ impl Timeline { forknum: *forknum, })); - if !self.get_rel_size_v2_enabled() { + if let RelSizeMigration::Legacy = self.get_rel_size_v2_status() { return Ok(rels_v1); } @@ -1720,6 +1723,35 @@ impl DatadirModification<'_> { Ok(()) } + /// Returns `true` if the rel_size_v2 write path is enabled. If it is the first time that + /// we enable it, we also need to persist it in `index_part.json`. + pub fn maybe_enable_rel_size_v2(&mut self) -> anyhow::Result { + let status = self.tline.get_rel_size_v2_status(); + let config = self.tline.get_rel_size_v2_enabled(); + match (config, status) { + (false, RelSizeMigration::Legacy) => { + // tenant config didn't enable it and we didn't write any reldir_v2 key yet + Ok(false) + } + (false, RelSizeMigration::Migrating | RelSizeMigration::Migrated) => { + // index_part already persisted that the timeline has enabled rel_size_v2 + Ok(true) + } + (true, RelSizeMigration::Legacy) => { + // The first time we enable it, we need to persist it in `index_part.json` + self.tline + .update_rel_size_v2_status(RelSizeMigration::Migrating)?; + tracing::info!("enabled rel_size_v2"); + Ok(true) + } + (true, RelSizeMigration::Migrating | RelSizeMigration::Migrated) => { + // index_part already persisted that the timeline has enabled rel_size_v2 + // and we don't need to do anything + Ok(true) + } + } + } + /// Store a relmapper file (pg_filenode.map) in the repository pub async fn put_relmap_file( &mut self, @@ -1728,6 +1760,8 @@ impl DatadirModification<'_> { img: Bytes, ctx: &RequestContext, ) -> anyhow::Result<()> { + let v2_enabled = self.maybe_enable_rel_size_v2()?; + // Add it to the directory (if it doesn't exist already) let buf = self.get(DBDIR_KEY, ctx).await?; let mut dbdir = DbDirectory::des(&buf)?; @@ -1748,7 +1782,7 @@ impl DatadirModification<'_> { })?; self.pending_directory_entries .push((DirectoryKind::Rel, MetricsUpdate::Set(0))); - if self.tline.get_rel_size_v2_enabled() { + if v2_enabled { self.pending_directory_entries .push((DirectoryKind::RelV2, MetricsUpdate::Set(0))); } @@ -1905,7 +1939,9 @@ impl DatadirModification<'_> { return Err(RelationError::AlreadyExists); } - if self.tline.get_rel_size_v2_enabled() { + let v2_enabled = self.maybe_enable_rel_size_v2()?; + + if v2_enabled { let sparse_rel_dir_key = rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum); // check if the rel_dir_key exists in v2 @@ -2031,6 +2067,7 @@ impl DatadirModification<'_> { drop_relations: HashMap<(u32, u32), Vec>, ctx: &RequestContext, ) -> anyhow::Result<()> { + let v2_enabled = self.maybe_enable_rel_size_v2()?; for ((spc_node, db_node), rel_tags) in drop_relations { let dir_key = rel_dir_to_key(spc_node, db_node); let buf = self.get(dir_key, ctx).await?; @@ -2043,7 +2080,7 @@ impl DatadirModification<'_> { .push((DirectoryKind::Rel, MetricsUpdate::Sub(1))); dirty = true; true - } else if self.tline.get_rel_size_v2_enabled() { + } else if v2_enabled { // The rel is not found in the old reldir key, so we need to check the new sparse keyspace. // Note that a relation can only exist in one of the two keyspaces (guaranteed by the ingestion // logic). diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 776e523c2e..fee007b2d7 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -31,8 +31,8 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use itertools::Itertools as _; use once_cell::sync::Lazy; -use pageserver_api::models; pub use pageserver_api::models::TenantState; +use pageserver_api::models::{self, RelSizeMigration}; use pageserver_api::models::{ CompactInfoResponse, LsnLease, TimelineArchivalState, TimelineState, TopTenantShardItem, WalRedoManagerStatus, @@ -1123,6 +1123,7 @@ impl Tenant { CreateTimelineCause::Load, idempotency.clone(), index_part.gc_compaction.clone(), + index_part.rel_size_migration.clone(), )?; let disk_consistent_lsn = timeline.get_disk_consistent_lsn(); anyhow::ensure!( @@ -4128,6 +4129,7 @@ impl Tenant { cause: CreateTimelineCause, create_idempotency: CreateTimelineIdempotency, gc_compaction_state: Option, + rel_size_v2_status: Option, ) -> anyhow::Result> { let state = match cause { CreateTimelineCause::Load => { @@ -4160,6 +4162,7 @@ impl Tenant { self.attach_wal_lag_cooldown.clone(), create_idempotency, gc_compaction_state, + rel_size_v2_status, self.cancel.child_token(), ); @@ -5231,6 +5234,7 @@ impl Tenant { CreateTimelineCause::Load, create_guard.idempotency.clone(), None, + None, ) .context("Failed to create timeline data structure")?; diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 4ba5844fea..2ca482ca43 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -194,7 +194,7 @@ pub(crate) use download::{ }; use index::GcCompactionState; pub(crate) use index::LayerFileMetadata; -use pageserver_api::models::TimelineArchivalState; +use pageserver_api::models::{RelSizeMigration, TimelineArchivalState}; use pageserver_api::shard::{ShardIndex, TenantShardId}; use regex::Regex; use remote_storage::{ @@ -900,7 +900,7 @@ impl RemoteTimelineClient { Ok(()) } - /// Launch an index-file upload operation in the background, setting `import_pgdata` field. + /// Launch an index-file upload operation in the background, setting `gc_compaction_state` field. pub(crate) fn schedule_index_upload_for_gc_compaction_state_update( self: &Arc, gc_compaction_state: GcCompactionState, @@ -912,6 +912,21 @@ impl RemoteTimelineClient { Ok(()) } + /// Launch an index-file upload operation in the background, setting `rel_size_v2_status` field. + pub(crate) fn schedule_index_upload_for_rel_size_v2_status_update( + self: &Arc, + rel_size_v2_status: RelSizeMigration, + ) -> anyhow::Result<()> { + let mut guard = self.upload_queue.lock().unwrap(); + let upload_queue = guard.initialized_mut()?; + upload_queue.dirty.rel_size_migration = Some(rel_size_v2_status); + // TODO: allow this operation to bypass the validation check because we might upload the index part + // with no layers but the flag updated. For now, we just modify the index part in memory and the next + // upload will include the flag. + // self.schedule_index_upload(upload_queue); + Ok(()) + } + /// /// Launch an index-file upload operation in the background, if necessary. /// diff --git a/pageserver/src/tenant/remote_timeline_client/index.rs b/pageserver/src/tenant/remote_timeline_client/index.rs index ceaed58bbd..16c38be907 100644 --- a/pageserver/src/tenant/remote_timeline_client/index.rs +++ b/pageserver/src/tenant/remote_timeline_client/index.rs @@ -7,6 +7,7 @@ use std::collections::HashMap; use chrono::NaiveDateTime; use pageserver_api::models::AuxFilePolicy; +use pageserver_api::models::RelSizeMigration; use pageserver_api::shard::ShardIndex; use serde::{Deserialize, Serialize}; use utils::id::TimelineId; @@ -117,21 +118,6 @@ pub struct GcCompactionState { pub(crate) last_completed_lsn: Lsn, } -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub enum RelSizeMigration { - /// The tenant is using the old rel_size format. - /// Note that this enum is persisted as `Option` in the index part, so - /// `None` is the same as `Some(RelSizeMigration::Legacy)`. - Legacy, - /// The tenant is migrating to the new rel_size format. Both old and new rel_size format are - /// persisted in the index part. The read path will read both formats and merge them. - Migrating, - /// The tenant has migrated to the new rel_size format. Only the new rel_size format is persisted - /// in the index part, and the read path will not read the old format. - Migrated, -} - impl IndexPart { /// When adding or modifying any parts of `IndexPart`, increment the version so that it can be /// used to understand later versions. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 17dbcee74e..7ed7910732 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -46,7 +46,7 @@ use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPart use pageserver_api::models::{ CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, - InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, TimelineState, + InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, TimelineState, }; use pageserver_api::reltag::{BlockNumber, RelTag}; use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId}; @@ -436,6 +436,8 @@ pub struct Timeline { /// May host a background Tokio task which downloads all the layers from the current /// heatmap on demand. heatmap_layers_downloader: Mutex>, + + pub(crate) rel_size_v2_status: ArcSwapOption, } pub(crate) enum PreviousHeatmap { @@ -2368,6 +2370,9 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.compaction_threshold) } + /// Returns `true` if the rel_size_v2 config is enabled. NOTE: the write path and read path + /// should look at `get_rel_size_v2_status()` to get the actual status of the timeline. It is + /// possible that the index part persists the state while the config doesn't get persisted. pub(crate) fn get_rel_size_v2_enabled(&self) -> bool { let tenant_conf = self.tenant_conf.load(); tenant_conf @@ -2376,6 +2381,14 @@ impl Timeline { .unwrap_or(self.conf.default_tenant_conf.rel_size_v2_enabled) } + pub(crate) fn get_rel_size_v2_status(&self) -> RelSizeMigration { + self.rel_size_v2_status + .load() + .as_ref() + .map(|s| s.as_ref().clone()) + .unwrap_or(RelSizeMigration::Legacy) + } + fn get_compaction_upper_limit(&self) -> usize { let tenant_conf = self.tenant_conf.load(); tenant_conf @@ -2636,6 +2649,7 @@ impl Timeline { attach_wal_lag_cooldown: Arc>, create_idempotency: crate::tenant::CreateTimelineIdempotency, gc_compaction_state: Option, + rel_size_v2_status: Option, cancel: CancellationToken, ) -> Arc { let disk_consistent_lsn = metadata.disk_consistent_lsn(); @@ -2794,6 +2808,8 @@ impl Timeline { previous_heatmap: ArcSwapOption::from_pointee(previous_heatmap), heatmap_layers_downloader: Mutex::new(None), + + rel_size_v2_status: ArcSwapOption::from_pointee(rel_size_v2_status), }; result.repartition_threshold = @@ -2870,6 +2886,16 @@ impl Timeline { .schedule_index_upload_for_gc_compaction_state_update(gc_compaction_state) } + pub(crate) fn update_rel_size_v2_status( + &self, + rel_size_v2_status: RelSizeMigration, + ) -> anyhow::Result<()> { + self.rel_size_v2_status + .store(Some(Arc::new(rel_size_v2_status.clone()))); + self.remote_client + .schedule_index_upload_for_rel_size_v2_status_update(rel_size_v2_status) + } + pub(crate) fn get_gc_compaction_state(&self) -> Option { self.gc_compaction_state.load_full().as_ref().clone() } diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 7cdc69e55f..c9666bb4e1 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -306,6 +306,7 @@ impl DeleteTimelineFlow { CreateTimelineCause::Delete, crate::tenant::CreateTimelineIdempotency::FailWithConflict, // doesn't matter what we put here None, // doesn't matter what we put here + None, // doesn't matter what we put here ) .context("create_timeline_struct")?; diff --git a/test_runner/performance/test_perf_many_relations.py b/test_runner/performance/test_perf_many_relations.py index 2570c55f6c..e2f0a79018 100644 --- a/test_runner/performance/test_perf_many_relations.py +++ b/test_runner/performance/test_perf_many_relations.py @@ -83,6 +83,13 @@ def test_perf_simple_many_relations_reldir_v2( ], ) + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + != "legacy" + ) + n = 100000 step = 5000 # Create many relations diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index 6a76ad5ca8..df243c13f1 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -358,6 +358,21 @@ def test_tx_abort_with_many_relations( ], ) + if reldir_type == "v1": + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + == "legacy" + ) + else: + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + != "legacy" + ) + # How many relations: this number is tuned to be long enough to take tens of seconds # if the rollback code path is buggy, tripping the test's timeout. if reldir_type == "v1": diff --git a/test_runner/regress/test_relations.py b/test_runner/regress/test_relations.py index 3e29c92a96..07eacfc775 100644 --- a/test_runner/regress/test_relations.py +++ b/test_runner/regress/test_relations.py @@ -19,6 +19,17 @@ def test_pageserver_reldir_v2( endpoint.safe_psql("CREATE TABLE foo1 (id INTEGER PRIMARY KEY, val text)") endpoint.safe_psql("CREATE TABLE foo2 (id INTEGER PRIMARY KEY, val text)") + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + == "legacy" + ) + + # Ensure the pageserver accepts the table creation SQLs before the migration. In theory, we can also do + # a "wait_flush_lsn" here, but it's easier to just do a restart. + env.pageserver.restart() + # Switch to v2 env.pageserver.http_client().update_tenant_config( env.initial_tenant, @@ -27,6 +38,13 @@ def test_pageserver_reldir_v2( }, ) + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + == "legacy" + ) + # Check if both relations are still accessible endpoint.safe_psql("SELECT * FROM foo1") endpoint.safe_psql("SELECT * FROM foo2") @@ -41,12 +59,14 @@ def test_pageserver_reldir_v2( # Create a relation in v2 endpoint.safe_psql("CREATE TABLE foo3 (id INTEGER PRIMARY KEY, val text)") + endpoint.safe_psql("CREATE TABLE foo4 (id INTEGER PRIMARY KEY, val text)") # Delete a relation in v1 endpoint.safe_psql("DROP TABLE foo1") # Check if both relations are still accessible endpoint.safe_psql("SELECT * FROM foo2") endpoint.safe_psql("SELECT * FROM foo3") + endpoint.safe_psql("SELECT * FROM foo4") # Restart the endpoint endpoint.stop() @@ -57,7 +77,7 @@ def test_pageserver_reldir_v2( endpoint.safe_psql("DROP TABLE IF EXISTS foo1") endpoint.safe_psql("SELECT * FROM foo2") endpoint.safe_psql("SELECT * FROM foo3") - + endpoint.safe_psql("SELECT * FROM foo4") endpoint.safe_psql("DROP TABLE foo3") endpoint.stop() endpoint.start() @@ -66,3 +86,25 @@ def test_pageserver_reldir_v2( endpoint.safe_psql("DROP TABLE IF EXISTS foo1") endpoint.safe_psql("SELECT * FROM foo2") endpoint.safe_psql("DROP TABLE IF EXISTS foo3") + endpoint.safe_psql("SELECT * FROM foo4") + + # Set the config to false to emulate the case where the config is not persisted when the tenant gets detached/attached. + env.pageserver.http_client().update_tenant_config( + env.initial_tenant, + { + "rel_size_v2_enabled": False, + }, + ) + + # Check if the relation is still accessible + endpoint.safe_psql("SELECT * FROM foo2") + endpoint.safe_psql("SELECT * FROM foo4") + + env.pageserver.restart() + + assert ( + env.pageserver.http_client().timeline_detail(env.initial_tenant, env.initial_timeline)[ + "rel_size_migration" + ] + == "migrating" + ) From 65addfc5246ad9ef633874dddb3ab0d66c028fe3 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Mon, 3 Mar 2025 23:04:59 +0100 Subject: [PATCH 079/207] storcon: add per-tenant rate limiting for API requests (#10924) ## Problem Incoming requests often take the service lock, and sometimes even do database transactions. That creates a risk that a rogue client can starve the controller of the ability to do its primary job of reconciling tenants to an available state. ## Summary of changes * Use the `governor` crate to rate limit tenant requests at 10 requests per second. This is ~10-100x lower than the worst "attack" we've seen from a client bug. Admin APIs are not rate limited. * Add a `storage_controller_http_request_rate_limited` histogram for rate limited requests. * Log a warning every 10 seconds for rate limited tenants. The rate limiter is parametrized on TenantId, because the kinds of client bug we're protecting against generally happen within tenant scope, and the rates should be somewhat stable: we expect the global rate of requests to increase as we do more work, but we do not expect the rate of requests to one tenant to increase. --------- Co-authored-by: John Spray --- Cargo.lock | 77 ++++++++++++++++++- Cargo.toml | 1 + storage_controller/Cargo.toml | 1 + storage_controller/src/http.rs | 69 +++++++++++++++-- storage_controller/src/main.rs | 6 ++ storage_controller/src/metrics.rs | 4 + storage_controller/src/service.rs | 5 ++ .../fixtures/pageserver/allowed_errors.py | 2 + 8 files changed, 156 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 293ed465ff..a978e4d744 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2410,9 +2410,9 @@ checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" [[package]] name = "futures-timer" -version = "3.0.2" +version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" [[package]] name = "futures-util" @@ -2515,6 +2515,27 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "governor" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "842dc78579ce01e6a1576ad896edc92fca002dd60c9c3746b7fc2bec6fb429d0" +dependencies = [ + "cfg-if", + "dashmap 6.1.0", + "futures-sink", + "futures-timer", + "futures-util", + "no-std-compat", + "nonzero_ext", + "parking_lot 0.12.1", + "portable-atomic", + "quanta", + "rand 0.8.5", + "smallvec", + "spinning_top", +] + [[package]] name = "group" version = "0.12.1" @@ -3725,6 +3746,12 @@ dependencies = [ "memoffset 0.9.0", ] +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" + [[package]] name = "nom" version = "7.1.3" @@ -3735,6 +3762,12 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + [[package]] name = "notify" version = "8.0.0" @@ -4591,6 +4624,12 @@ dependencies = [ "never-say-never", ] +[[package]] +name = "portable-atomic" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" + [[package]] name = "postgres" version = "0.19.7" @@ -5052,6 +5091,21 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "quanta" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi 0.11.0+wasi-snapshot-preview1", + "web-sys", + "winapi", +] + [[package]] name = "quick-xml" version = "0.26.0" @@ -5182,6 +5236,15 @@ dependencies = [ "num-traits", ] +[[package]] +name = "raw-cpuid" +version = "11.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6928fa44c097620b706542d428957635951bade7143269085389d42c8a4927e" +dependencies = [ + "bitflags 2.8.0", +] + [[package]] name = "rayon" version = "1.7.0" @@ -6395,6 +6458,15 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spinning_top" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" +dependencies = [ + "lock_api", +] + [[package]] name = "spki" version = "0.6.0" @@ -6471,6 +6543,7 @@ dependencies = [ "diesel_migrations", "fail", "futures", + "governor", "hex", "http-utils", "humantime", diff --git a/Cargo.toml b/Cargo.toml index ff45d46a47..870b3412db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ futures = "0.3" futures-core = "0.3" futures-util = "0.3" git-version = "0.3" +governor = "0.8" hashbrown = "0.14" hashlink = "0.9.1" hdrhistogram = "7.5.2" diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index b63ba154da..6b657b5ea0 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -21,6 +21,7 @@ clap.workspace = true cron.workspace = true fail.workspace = true futures.workspace = true +governor.workspace = true hex.workspace = true hyper0.workspace = true humantime.workspace = true diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 64f0be3c23..3e448d7013 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -1,5 +1,5 @@ use std::str::FromStr; -use std::sync::Arc; +use std::sync::{Arc, LazyLock}; use std::time::{Duration, Instant}; use anyhow::Context; @@ -33,6 +33,7 @@ use pageserver_api::upcall_api::{ReAttachRequest, ValidateRequest}; use pageserver_client::{BlockUnblock, mgmt_api}; use routerify::Middleware; use tokio_util::sync::CancellationToken; +use tracing::warn; use utils::auth::{Scope, SwappableJwtAuth}; use utils::id::{NodeId, TenantId, TimelineId}; @@ -49,6 +50,7 @@ use crate::service::{LeadershipStatus, RECONCILE_TIMEOUT, STARTUP_RECONCILE_TIME pub struct HttpState { service: Arc, auth: Option>, + rate_limiter: governor::DefaultKeyedRateLimiter, neon_metrics: NeonMetrics, allowlist_routes: &'static [&'static str], } @@ -59,9 +61,11 @@ impl HttpState { auth: Option>, build_info: BuildInfo, ) -> Self { + let quota = governor::Quota::per_second(service.get_config().tenant_rate_limit); Self { service, auth, + rate_limiter: governor::RateLimiter::keyed(quota), neon_metrics: NeonMetrics::new(build_info), allowlist_routes: &[ "/status", @@ -82,6 +86,40 @@ fn get_state(request: &Request) -> &HttpState { .as_ref() } +/// Rate limits tenant requests. +/// +/// TODO: this should be a request middleware, but requires us to extract the tenant ID from +/// different URLs in a systematic way. +/// +/// TODO: consider returning a 429 response if these start piling up. +async fn maybe_rate_limit(request: &Request, tenant_id: TenantId) { + // Check if the tenant should be rate-limited. + let rate_limiter = &get_state(request).rate_limiter; + if rate_limiter.check_key(&tenant_id).is_ok() { + return; + } + + // Measure the rate limiting delay. + let _timer = METRICS_REGISTRY + .metrics_group + .storage_controller_http_request_rate_limited + .start_timer(); + + // Log rate limited tenants once every 10 seconds. + static LOG_RATE_LIMITER: LazyLock> = + LazyLock::new(|| { + let quota = governor::Quota::with_period(Duration::from_secs(10)).unwrap(); + governor::RateLimiter::keyed(quota) + }); + + if LOG_RATE_LIMITER.check_key(&tenant_id).is_ok() { + warn!("tenant {tenant_id} is rate limited") + } + + // Wait for quota. + rate_limiter.until_key_ready(&tenant_id).await; +} + /// Pageserver calls into this on startup, to learn which tenants it should attach async fn handle_re_attach(req: Request) -> Result, ApiError> { check_permissions(&req, Scope::GenerationsApi)?; @@ -247,6 +285,7 @@ async fn handle_tenant_config_get( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -264,6 +303,7 @@ async fn handle_tenant_time_travel_remote_storage( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -311,6 +351,7 @@ async fn handle_tenant_secondary_download( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; let wait = parse_query_param(&req, "wait_ms")?.map(Duration::from_millis); + maybe_rate_limit(&req, tenant_id).await; match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -329,6 +370,7 @@ async fn handle_tenant_delete( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -356,6 +398,7 @@ async fn handle_tenant_timeline_create( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -381,6 +424,7 @@ async fn handle_tenant_timeline_delete( let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -457,6 +501,7 @@ async fn handle_tenant_timeline_archival_config( let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -482,6 +527,7 @@ async fn handle_tenant_timeline_detach_ancestor( let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -504,6 +550,7 @@ async fn handle_tenant_timeline_block_unblock_gc( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; @@ -521,6 +568,7 @@ async fn handle_tenant_timeline_download_heatmap_layers( let tenant_shard_id: TenantShardId = parse_request_param(&req, "tenant_shard_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_shard_id.tenant_id).await; let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; let concurrency: Option = parse_query_param(&req, "concurrency")?; @@ -550,6 +598,7 @@ async fn handle_tenant_timeline_passthrough( ) -> Result, ApiError> { let tenant_or_shard_id: TenantShardId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_or_shard_id.tenant_id).await; let req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -654,6 +703,7 @@ async fn handle_tenant_locate( let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -669,9 +719,9 @@ async fn handle_tenant_describe( service: Arc, req: Request, ) -> Result, ApiError> { - check_permissions(&req, Scope::Scrubber)?; - let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + check_permissions(&req, Scope::Scrubber)?; + // NB: don't rate limit: scrubber operation. match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1006,6 +1056,7 @@ async fn handle_tenant_shard_split( req: Request, ) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1028,6 +1079,7 @@ async fn handle_tenant_shard_migrate( req: Request, ) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1051,6 +1103,7 @@ async fn handle_tenant_shard_migrate_secondary( req: Request, ) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1074,6 +1127,7 @@ async fn handle_tenant_shard_cancel_reconcile( req: Request, ) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. let req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1093,6 +1147,7 @@ async fn handle_tenant_shard_cancel_reconcile( async fn handle_tenant_update_policy(req: Request) -> Result, ApiError> { check_permissions(&req, Scope::Admin)?; + // NB: don't rate limit: admin operation. let mut req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1148,9 +1203,9 @@ async fn handle_step_down(req: Request) -> Result, ApiError } async fn handle_tenant_drop(req: Request) -> Result, ApiError> { - check_permissions(&req, Scope::PageServerApi)?; - let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { @@ -1165,9 +1220,9 @@ async fn handle_tenant_drop(req: Request) -> Result, ApiErr } async fn handle_tenant_import(req: Request) -> Result, ApiError> { - check_permissions(&req, Scope::PageServerApi)?; - let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; + check_permissions(&req, Scope::PageServerApi)?; + maybe_rate_limit(&req, tenant_id).await; let req = match maybe_forward(req).await { ForwardOutcome::Forwarded(res) => { diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 380ffeb9b7..6ef17c0007 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -1,3 +1,4 @@ +use std::num::NonZeroU32; use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; @@ -98,6 +99,10 @@ struct Cli { #[arg(long)] priority_reconciler_concurrency: Option, + /// Tenant API rate limit, as requests per second per tenant. + #[arg(long, default_value = "10")] + tenant_rate_limit: NonZeroU32, + /// How long to wait for the initial database connection to be available. #[arg(long, default_value = "5s")] db_connect_timeout: humantime::Duration, @@ -339,6 +344,7 @@ async fn async_main() -> anyhow::Result<()> { priority_reconciler_concurrency: args .priority_reconciler_concurrency .unwrap_or(PRIORITY_RECONCILER_CONCURRENCY_DEFAULT), + tenant_rate_limit: args.tenant_rate_limit, split_threshold: args.split_threshold, neon_local_repo_dir: args.neon_local_repo_dir, max_secondary_lag_bytes: args.max_secondary_lag_bytes, diff --git a/storage_controller/src/metrics.rs b/storage_controller/src/metrics.rs index f490edb68f..ea390df726 100644 --- a/storage_controller/src/metrics.rs +++ b/storage_controller/src/metrics.rs @@ -76,6 +76,10 @@ pub(crate) struct StorageControllerMetricGroup { pub(crate) storage_controller_http_request_latency: measured::HistogramVec, + /// HTTP rate limiting latency across all tenants and endpoints + #[metric(metadata = histogram::Thresholds::exponential_buckets(0.1, 10.0))] + pub(crate) storage_controller_http_request_rate_limited: measured::Histogram<10>, + /// Count of HTTP requests to the pageserver that resulted in an error, /// broken down by the pageserver node id, request name and method pub(crate) storage_controller_pageserver_request_error: diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 26ccfd5445..8fc7f7a0c5 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -5,6 +5,7 @@ use std::borrow::Cow; use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; use std::error::Error; +use std::num::NonZeroU32; use std::ops::Deref; use std::path::PathBuf; use std::str::FromStr; @@ -365,6 +366,10 @@ pub struct Config { /// How many high-priority Reconcilers may be spawned concurrently pub priority_reconciler_concurrency: usize, + /// How many API requests per second to allow per tenant, across all + /// tenant-scoped API endpoints. Further API requests queue until ready. + pub tenant_rate_limit: NonZeroU32, + /// How large must a shard grow in bytes before we split it? /// None disables auto-splitting. pub split_threshold: Option, diff --git a/test_runner/fixtures/pageserver/allowed_errors.py b/test_runner/fixtures/pageserver/allowed_errors.py index 4fce558840..abddfa2768 100755 --- a/test_runner/fixtures/pageserver/allowed_errors.py +++ b/test_runner/fixtures/pageserver/allowed_errors.py @@ -124,6 +124,8 @@ DEFAULT_STORAGE_CONTROLLER_ALLOWED_ERRORS = [ # controller's attempts to notify the endpoint). ".*reconciler.*neon_local notification hook failed.*", ".*reconciler.*neon_local error.*", + # Tenant rate limits may fire in tests that submit lots of API requests. + ".*tenant \\S+ is rate limited.*", ] From 435bf452e6ec4b9a5e10388911ccd80140cb3311 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Tue, 4 Mar 2025 08:18:19 +0000 Subject: [PATCH 080/207] tests: remove obsolete err log whitelisting (#11069) The pageserver read path now supports overlapped in-memory and image layers via https://github.com/neondatabase/neon/pull/11000. These allowed errors are now obsolete. --- test_runner/regress/test_sharding.py | 11 ----------- test_runner/regress/test_storage_scrubber.py | 11 ----------- 2 files changed, 22 deletions(-) diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index f58bbcd3c0..cb28f5b12d 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -1814,14 +1814,3 @@ def test_sharding_gc( shard_gc_cutoff_lsn = Lsn(shard_index["metadata_bytes"]["latest_gc_cutoff_lsn"]) log.info(f"Shard {shard_number} cutoff LSN: {shard_gc_cutoff_lsn}") assert shard_gc_cutoff_lsn == shard_0_gc_cutoff_lsn - - for ps in env.pageservers: - # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by - # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. - # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed - ps.allowed_errors.extend( - [ - ".*could not find data for key.*", - ".*could not ingest record.*", - ] - ) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index d44c176b35..0f4e5688a9 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -312,17 +312,6 @@ def test_scrubber_physical_gc_ancestors(neon_env_builder: NeonEnvBuilder, shard_ drop_local_state(env, tenant_id) workload.validate() - for ps in env.pageservers: - # This is not okay, but it's not a scrubber bug: it's a pageserver issue that is exposed by - # the specific pattern of aggressive checkpointing+image layer generation + GC that this test does. - # TODO: remove when https://github.com/neondatabase/neon/issues/10720 is fixed - ps.allowed_errors.extend( - [ - ".*could not find data for key.*", - ".*could not ingest record.*", - ] - ) - def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder): """ From a2902e774aaebbb3e424ad23be30a86e413ab431 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 4 Mar 2025 13:13:41 +0100 Subject: [PATCH 081/207] http-utils: generate heap profiles with jemalloc_pprof (#11075) ## Problem The code to generate symbolized pprof heap profiles and flamegraph SVGs has been upstreamed to the `jemalloc_pprof` crate: * https://github.com/polarsignals/rust-jemalloc-pprof/pull/22 * https://github.com/polarsignals/rust-jemalloc-pprof/pull/23 ## Summary of changes Use `jemalloc_pprof` to generate symbolized pprof heap profiles and flamegraph SVGs. This reintroduces a bunch of internal jemalloc stack frames that we'd previously strip, e.g. each stack now always ends with `prof_backtrace_impl` (where jemalloc takes a stack trace for heap profiling), but that seems ok. --- Cargo.lock | 18 ++- Cargo.toml | 4 +- libs/http-utils/Cargo.toml | 3 - libs/http-utils/src/endpoint.rs | 58 ++------ libs/http-utils/src/lib.rs | 1 - libs/http-utils/src/pprof.rs | 238 -------------------------------- libs/utils/Cargo.toml | 1 - 7 files changed, 21 insertions(+), 302 deletions(-) delete mode 100644 libs/http-utils/src/pprof.rs diff --git a/Cargo.lock b/Cargo.lock index a978e4d744..030753bca5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2798,12 +2798,9 @@ name = "http-utils" version = "0.1.0" dependencies = [ "anyhow", - "backtrace", "bytes", "fail", - "flate2", "hyper 0.14.30", - "inferno 0.12.0", "itertools 0.10.5", "jemalloc_pprof", "metrics", @@ -3302,9 +3299,9 @@ checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" [[package]] name = "jemalloc_pprof" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a883828bd6a4b957cd9f618886ff19e5f3ebd34e06ba0e855849e049fef32fb" +checksum = "5622af6d21ff86ed7797ef98e11b8f302da25ec69a7db9f6cde8e2e1c8df9992" dependencies = [ "anyhow", "libc", @@ -3503,9 +3500,9 @@ dependencies = [ [[package]] name = "mappings" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce9229c438fbf1c333926e2053c4c091feabbd40a1b590ec62710fea2384af9e" +checksum = "e434981a332777c2b3062652d16a55f8e74fa78e6b1882633f0d77399c84fc2a" dependencies = [ "anyhow", "libc", @@ -4794,12 +4791,14 @@ dependencies = [ [[package]] name = "pprof_util" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65c568b3f8c1c37886ae07459b1946249e725c315306b03be5632f84c239f781" +checksum = "9fa015c78eed2130951e22c58d2095849391e73817ab2e74f71b0b9f63dd8416" dependencies = [ "anyhow", + "backtrace", "flate2", + "inferno 0.12.0", "num", "paste", "prost", @@ -7715,7 +7714,6 @@ dependencies = [ "anyhow", "arc-swap", "async-compression", - "backtrace", "bincode", "byteorder", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 870b3412db..2303723e43 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -53,7 +53,6 @@ anyhow = { version = "1.0", features = ["backtrace"] } arc-swap = "1.6" async-compression = { version = "0.4.0", features = ["tokio", "gzip", "zstd"] } atomic-take = "1.1.0" -backtrace = "0.3.74" flate2 = "1.0.26" assert-json-diff = "2" async-stream = "0.3" @@ -114,11 +113,10 @@ hyper-util = "0.1" tokio-tungstenite = "0.21.0" indexmap = "2" indoc = "2" -inferno = "0.12.0" ipnet = "2.10.0" itertools = "0.10" itoa = "1.0.11" -jemalloc_pprof = "0.6" +jemalloc_pprof = { version = "0.7", features = ["symbolize", "flamegraph"] } jsonwebtoken = "9" lasso = "0.7" libc = "0.2" diff --git a/libs/http-utils/Cargo.toml b/libs/http-utils/Cargo.toml index d72e4bd012..d16dac7876 100644 --- a/libs/http-utils/Cargo.toml +++ b/libs/http-utils/Cargo.toml @@ -6,11 +6,8 @@ license.workspace = true [dependencies] anyhow.workspace = true -backtrace.workspace = true bytes.workspace = true -inferno.workspace = true fail.workspace = true -flate2.workspace = true hyper0.workspace = true itertools.workspace = true jemalloc_pprof.workspace = true diff --git a/libs/http-utils/src/endpoint.rs b/libs/http-utils/src/endpoint.rs index 6128113580..f4f93df62f 100644 --- a/libs/http-utils/src/endpoint.rs +++ b/libs/http-utils/src/endpoint.rs @@ -3,8 +3,6 @@ use std::io::Write as _; use std::str::FromStr; use std::time::Duration; -use ::pprof::ProfilerGuardBuilder; -use ::pprof::protos::Message as _; use anyhow::{Context, anyhow}; use bytes::{Bytes, BytesMut}; use hyper::header::{AUTHORIZATION, CONTENT_DISPOSITION, CONTENT_TYPE, HeaderName}; @@ -12,7 +10,8 @@ use hyper::http::HeaderValue; use hyper::{Body, Method, Request, Response}; use metrics::{Encoder, IntCounter, TextEncoder, register_int_counter}; use once_cell::sync::Lazy; -use regex::Regex; +use pprof::ProfilerGuardBuilder; +use pprof::protos::Message as _; use routerify::ext::RequestExt; use routerify::{Middleware, RequestInfo, Router, RouterBuilder}; use tokio::sync::{Mutex, Notify, mpsc}; @@ -22,7 +21,6 @@ use tracing::{Instrument, debug, info, info_span, warn}; use utils::auth::{AuthError, Claims, SwappableJwtAuth}; use crate::error::{ApiError, api_error_handler, route_error_handler}; -use crate::pprof; use crate::request::{get_query_param, parse_query_param}; static SERVE_METRICS_COUNT: Lazy = Lazy::new(|| { @@ -449,20 +447,6 @@ pub async fn profile_heap_handler(req: Request) -> Result, Some(format) => return Err(ApiError::BadRequest(anyhow!("invalid format {format}"))), }; - // Functions and mappings to strip when symbolizing pprof profiles. If true, - // also remove child frames. - static STRIP_FUNCTIONS: Lazy> = Lazy::new(|| { - vec![ - (Regex::new("^__rust").unwrap(), false), - (Regex::new("^_start$").unwrap(), false), - (Regex::new("^irallocx_prof").unwrap(), true), - (Regex::new("^prof_alloc_prep").unwrap(), true), - (Regex::new("^std::rt::lang_start").unwrap(), false), - (Regex::new("^std::sys::backtrace::__rust").unwrap(), false), - ] - }); - const STRIP_MAPPINGS: &[&str] = &["libc", "libgcc", "pthread", "vdso"]; - // Obtain profiler handle. let mut prof_ctl = jemalloc_pprof::PROF_CTL .as_ref() @@ -495,45 +479,27 @@ pub async fn profile_heap_handler(req: Request) -> Result, } Format::Pprof => { - let data = tokio::task::spawn_blocking(move || { - let bytes = prof_ctl.dump_pprof()?; - // Symbolize the profile. - // TODO: consider moving this upstream to jemalloc_pprof and avoiding the - // serialization roundtrip. - let profile = pprof::decode(&bytes)?; - let profile = pprof::symbolize(profile)?; - let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS); - pprof::encode(&profile) - }) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let data = tokio::task::spawn_blocking(move || prof_ctl.dump_pprof()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "application/octet-stream") - .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb\"") + .header(CONTENT_DISPOSITION, "attachment; filename=\"heap.pb.gz\"") .body(Body::from(data)) .map_err(|err| ApiError::InternalServerError(err.into())) } Format::Svg => { - let body = tokio::task::spawn_blocking(move || { - let bytes = prof_ctl.dump_pprof()?; - let profile = pprof::decode(&bytes)?; - let profile = pprof::symbolize(profile)?; - let profile = pprof::strip_locations(profile, STRIP_MAPPINGS, &STRIP_FUNCTIONS); - let mut opts = inferno::flamegraph::Options::default(); - opts.title = "Heap inuse".to_string(); - opts.count_name = "bytes".to_string(); - pprof::flamegraph(profile, &mut opts) - }) - .await - .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? - .map_err(ApiError::InternalServerError)?; + let svg = tokio::task::spawn_blocking(move || prof_ctl.dump_flamegraph()) + .await + .map_err(|join_err| ApiError::InternalServerError(join_err.into()))? + .map_err(ApiError::InternalServerError)?; Response::builder() .status(200) .header(CONTENT_TYPE, "image/svg+xml") - .body(Body::from(body)) + .body(Body::from(svg)) .map_err(|err| ApiError::InternalServerError(err.into())) } } diff --git a/libs/http-utils/src/lib.rs b/libs/http-utils/src/lib.rs index c692a54257..1e9b3c761a 100644 --- a/libs/http-utils/src/lib.rs +++ b/libs/http-utils/src/lib.rs @@ -2,7 +2,6 @@ pub mod endpoint; pub mod error; pub mod failpoints; pub mod json; -pub mod pprof; pub mod request; extern crate hyper0 as hyper; diff --git a/libs/http-utils/src/pprof.rs b/libs/http-utils/src/pprof.rs deleted file mode 100644 index 529017f350..0000000000 --- a/libs/http-utils/src/pprof.rs +++ /dev/null @@ -1,238 +0,0 @@ -use std::borrow::Cow; -use std::collections::{HashMap, HashSet}; -use std::ffi::c_void; -use std::io::Write as _; - -use anyhow::bail; -use flate2::Compression; -use flate2::write::{GzDecoder, GzEncoder}; -use itertools::Itertools as _; -use pprof::protos::{Function, Line, Location, Message as _, Profile}; -use regex::Regex; - -/// Decodes a gzip-compressed Protobuf-encoded pprof profile. -pub fn decode(bytes: &[u8]) -> anyhow::Result { - let mut gz = GzDecoder::new(Vec::new()); - gz.write_all(bytes)?; - Ok(Profile::parse_from_bytes(&gz.finish()?)?) -} - -/// Encodes a pprof profile as gzip-compressed Protobuf. -pub fn encode(profile: &Profile) -> anyhow::Result> { - let mut gz = GzEncoder::new(Vec::new(), Compression::default()); - profile.write_to_writer(&mut gz)?; - Ok(gz.finish()?) -} - -/// Symbolizes a pprof profile using the current binary. -pub fn symbolize(mut profile: Profile) -> anyhow::Result { - if !profile.function.is_empty() { - return Ok(profile); // already symbolized - } - - // Collect function names. - let mut functions: HashMap = HashMap::new(); - let mut strings: HashMap = profile - .string_table - .into_iter() - .enumerate() - .map(|(i, s)| (s, i as i64)) - .collect(); - - // Helper to look up or register a string. - let mut string_id = |s: &str| -> i64 { - // Don't use .entry() to avoid unnecessary allocations. - if let Some(id) = strings.get(s) { - return *id; - } - let id = strings.len() as i64; - strings.insert(s.to_string(), id); - id - }; - - for loc in &mut profile.location { - if !loc.line.is_empty() { - continue; - } - - // Resolve the line and function for each location. - backtrace::resolve(loc.address as *mut c_void, |symbol| { - let Some(symbol_name) = symbol.name() else { - return; - }; - - let function_name = format!("{symbol_name:#}"); - let functions_len = functions.len(); - let function_id = functions - .entry(function_name) - .or_insert_with_key(|function_name| { - let function_id = functions_len as u64 + 1; - let system_name = String::from_utf8_lossy(symbol_name.as_bytes()); - let filename = symbol - .filename() - .map(|path| path.to_string_lossy()) - .unwrap_or(Cow::Borrowed("")); - Function { - id: function_id, - name: string_id(function_name), - system_name: string_id(&system_name), - filename: string_id(&filename), - ..Default::default() - } - }) - .id; - loc.line.push(Line { - function_id, - line: symbol.lineno().unwrap_or(0) as i64, - ..Default::default() - }); - }); - } - - // Store the resolved functions, and mark the mapping as resolved. - profile.function = functions.into_values().sorted_by_key(|f| f.id).collect(); - profile.string_table = strings - .into_iter() - .sorted_by_key(|(_, i)| *i) - .map(|(s, _)| s) - .collect(); - - for mapping in &mut profile.mapping { - mapping.has_functions = true; - mapping.has_filenames = true; - } - - Ok(profile) -} - -/// Strips locations (stack frames) matching the given mappings (substring) or function names -/// (regex). The function bool specifies whether child frames should be stripped as well. -/// -/// The string definitions are left behind in the profile for simplicity, to avoid rewriting all -/// string references. -pub fn strip_locations( - mut profile: Profile, - mappings: &[&str], - functions: &[(Regex, bool)], -) -> Profile { - // Strip mappings. - let mut strip_mappings: HashSet = HashSet::new(); - - profile.mapping.retain(|mapping| { - let Some(name) = profile.string_table.get(mapping.filename as usize) else { - return true; - }; - if mappings.iter().any(|substr| name.contains(substr)) { - strip_mappings.insert(mapping.id); - return false; - } - true - }); - - // Strip functions. - let mut strip_functions: HashMap = HashMap::new(); - - profile.function.retain(|function| { - let Some(name) = profile.string_table.get(function.name as usize) else { - return true; - }; - for (regex, strip_children) in functions { - if regex.is_match(name) { - strip_functions.insert(function.id, *strip_children); - return false; - } - } - true - }); - - // Strip locations. The bool specifies whether child frames should be stripped too. - let mut strip_locations: HashMap = HashMap::new(); - - profile.location.retain(|location| { - for line in &location.line { - if let Some(strip_children) = strip_functions.get(&line.function_id) { - strip_locations.insert(location.id, *strip_children); - return false; - } - } - if strip_mappings.contains(&location.mapping_id) { - strip_locations.insert(location.id, false); - return false; - } - true - }); - - // Strip sample locations. - for sample in &mut profile.sample { - // First, find the uppermost function with child removal and truncate the stack. - if let Some(truncate) = sample - .location_id - .iter() - .rposition(|id| strip_locations.get(id) == Some(&true)) - { - sample.location_id.drain(..=truncate); - } - // Next, strip any individual frames without child removal. - sample - .location_id - .retain(|id| !strip_locations.contains_key(id)); - } - - profile -} - -/// Generates an SVG flamegraph from a symbolized pprof profile. -pub fn flamegraph( - profile: Profile, - opts: &mut inferno::flamegraph::Options, -) -> anyhow::Result> { - if profile.mapping.iter().any(|m| !m.has_functions) { - bail!("profile not symbolized"); - } - - // Index locations, functions, and strings. - let locations: HashMap = - profile.location.into_iter().map(|l| (l.id, l)).collect(); - let functions: HashMap = - profile.function.into_iter().map(|f| (f.id, f)).collect(); - let strings = profile.string_table; - - // Resolve stacks as function names, and sum sample values per stack. Also reverse the stack, - // since inferno expects it bottom-up. - let mut stacks: HashMap, i64> = HashMap::new(); - for sample in profile.sample { - let mut stack = Vec::with_capacity(sample.location_id.len()); - for location in sample.location_id.into_iter().rev() { - let Some(location) = locations.get(&location) else { - bail!("missing location {location}"); - }; - for line in location.line.iter().rev() { - let Some(function) = functions.get(&line.function_id) else { - bail!("missing function {}", line.function_id); - }; - let Some(name) = strings.get(function.name as usize) else { - bail!("missing string {}", function.name); - }; - stack.push(name.as_str()); - } - } - let Some(&value) = sample.value.first() else { - bail!("missing value"); - }; - *stacks.entry(stack).or_default() += value; - } - - // Construct stack lines for inferno. - let lines = stacks - .into_iter() - .map(|(stack, value)| (stack.into_iter().join(";"), value)) - .map(|(stack, value)| format!("{stack} {value}")) - .sorted() - .collect_vec(); - - // Construct the flamegraph. - let mut bytes = Vec::new(); - let lines = lines.iter().map(|line| line.as_str()); - inferno::flamegraph::from_lines(opts, lines, &mut bytes)?; - Ok(bytes) -} diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index 5020d82adf..ac44300a51 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -15,7 +15,6 @@ arc-swap.workspace = true sentry.workspace = true async-compression.workspace = true anyhow.workspace = true -backtrace.workspace = true bincode.workspace = true bytes.workspace = true camino.workspace = true From 20af9cef17374a287850a866983943bcad579fa2 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 4 Mar 2025 09:55:50 -0500 Subject: [PATCH 082/207] fix(test): use the same value for reldir v1+v2 (#11070) ## Problem part of https://github.com/neondatabase/neon/issues/11067 My observation is that with the current value of settings, x86-v1 usually takes 30s, arm-v1 1m30s, x86-v2 1m, arm-v2 3m. But sometimes the system could run too slow and cause test to timeout on arm with reldir v2. While I investigate what's going on and further improve the performance, I'd like to set both of them to use the same test input, so that it doesn't timeout and we don't abuse this test case as a performance test. ## Summary of changes Use the same settings for both test cases. Signed-off-by: Alex Chi Z --- test_runner/regress/test_pg_regress.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index df243c13f1..d2a78b16e4 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -375,12 +375,8 @@ def test_tx_abort_with_many_relations( # How many relations: this number is tuned to be long enough to take tens of seconds # if the rollback code path is buggy, tripping the test's timeout. - if reldir_type == "v1": - n = 4000 - step = 4000 - else: - n = 20000 - step = 5000 + n = 5000 + step = 2500 def create(): # Create many relations From 4bbdb758ec2f4f1552a76b8bce424fa7d0b2fdc7 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 4 Mar 2025 17:39:32 +0100 Subject: [PATCH 083/207] compute_tools: appease unused lint on macOS (#11074) ## Problem On macOS, the `unused` lint complains about two variables not used in `!linux` builds. These were introduced in #11007. ## Summary of changes Appease the linter by explicitly using the variables in `!linux` branches. --- compute_tools/src/compute.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index a89d3345c1..e4d5a6aaba 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -468,6 +468,8 @@ impl ComputeNode { // Kills the actual task running the monitor handle.abort(); } + } else { + _ = vm_monitor; // appease unused lint on macOS } } } @@ -791,6 +793,7 @@ impl ComputeNode { }; StartVmMonitorResult { token, vm_monitor } } else { + _ = disable_lfc_resizing; // appease unused lint on macOS StartVmMonitorResult { } } } From 7b7e4a9fd3b1909b282349939284649bbcb040b0 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Tue, 4 Mar 2025 12:08:00 -0600 Subject: [PATCH 084/207] Authorize compute_ctl requests from the control plane (#10530) The compute should only act if requests come from the control plane. Signed-off-by: Tristan Partin Signed-off-by: Tristan Partin --- Cargo.lock | 68 +++++++- Cargo.toml | 3 +- compute_tools/Cargo.toml | 2 + compute_tools/src/bin/compute_ctl.rs | 1 + compute_tools/src/compute.rs | 24 ++- compute_tools/src/http/extract/mod.rs | 2 + compute_tools/src/http/extract/request_id.rs | 86 ++++++++++ compute_tools/src/http/headers.rs | 2 + .../src/http/middleware/authorize.rs | 145 ++++++++++++++++ compute_tools/src/http/middleware/mod.rs | 1 + compute_tools/src/http/mod.rs | 2 + compute_tools/src/http/server.rs | 158 ++++++++++-------- libs/compute_api/src/responses.rs | 4 +- workspace_hack/Cargo.toml | 2 +- 14 files changed, 417 insertions(+), 83 deletions(-) create mode 100644 compute_tools/src/http/extract/request_id.rs create mode 100644 compute_tools/src/http/headers.rs create mode 100644 compute_tools/src/http/middleware/authorize.rs create mode 100644 compute_tools/src/http/middleware/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 030753bca5..772b1f50c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -783,6 +783,28 @@ dependencies = [ "tracing", ] +[[package]] +name = "axum-extra" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fc6f625a1f7705c6cf62d0d070794e94668988b1c38111baeec177c715f7b" +dependencies = [ + "axum", + "axum-core", + "bytes", + "futures-util", + "headers", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "mime", + "pin-project-lite", + "serde", + "tower 0.5.2", + "tower-layer", + "tower-service", +] + [[package]] name = "azure_core" version = "0.21.0" @@ -925,9 +947,9 @@ checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5" [[package]] name = "base64" -version = "0.21.1" +version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f1e31e207a6b8fb791a38ea3105e6cb541f55e4d029902d3039a4ad07cc4105" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" [[package]] name = "base64" @@ -1305,6 +1327,7 @@ dependencies = [ "aws-sdk-s3", "aws-smithy-types", "axum", + "axum-extra", "base64 0.13.1", "bytes", "camino", @@ -1316,6 +1339,7 @@ dependencies = [ "flate2", "futures", "http 1.1.0", + "jsonwebtoken", "metrics", "nix 0.27.1", "notify", @@ -2297,7 +2321,7 @@ name = "framed-websockets" version = "0.1.0" source = "git+https://github.com/neondatabase/framed-websockets#34eff3d6f8cfccbc5f35e4f65314ff7328621127" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", "bytemuck", "bytes", "futures-core", @@ -2653,7 +2677,7 @@ version = "7.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", "byteorder", "crossbeam-channel", "flate2", @@ -2661,6 +2685,30 @@ dependencies = [ "num-traits", ] +[[package]] +name = "headers" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322106e6bd0cba2d5ead589ddb8150a13d7c4217cf80d7c4f682ca994ccc6aa9" +dependencies = [ + "base64 0.21.7", + "bytes", + "headers-core", + "http 1.1.0", + "httpdate", + "mime", + "sha1", +] + +[[package]] +name = "headers-core" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" +dependencies = [ + "http 1.1.0", +] + [[package]] name = "heck" version = "0.5.0" @@ -3385,7 +3433,7 @@ version = "9.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c7ea04a7c5c055c175f189b6dc6ba036fd62306b58c66c9f6389036c503a3f4" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", "js-sys", "pem", "ring", @@ -4467,7 +4515,7 @@ version = "3.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b8fcc794035347fb64beda2d3b462595dd2753e3f268d89c5aae77e8cf2c310" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", "serde", ] @@ -5814,7 +5862,7 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d194b56d58803a43635bdc398cd17e383d6f71f9182b9a192c127ca42494a59b" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", ] [[package]] @@ -5823,7 +5871,7 @@ version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f48172685e6ff52a556baa527774f61fcaa884f59daf3375c62a3f1cd2549dab" dependencies = [ - "base64 0.21.1", + "base64 0.21.7", "rustls-pki-types", ] @@ -7357,10 +7405,12 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "403fa3b783d4b626a8ad51d766ab03cb6d2dbfc46b1c5d4448395e6628dc9697" dependencies = [ + "base64 0.22.1", "bitflags 2.8.0", "bytes", "http 1.1.0", "http-body 1.0.0", + "mime", "pin-project-lite", "tower-layer", "tower-service", @@ -8267,7 +8317,7 @@ dependencies = [ "ahash", "anyhow", "base64 0.13.1", - "base64 0.21.1", + "base64 0.21.7", "base64ct", "bytes", "camino", diff --git a/Cargo.toml b/Cargo.toml index 2303723e43..d11fe4f449 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -67,6 +67,7 @@ aws-credential-types = "1.2.0" aws-sigv4 = { version = "1.2", features = ["sign-http"] } aws-types = "1.3" axum = { version = "0.8.1", features = ["ws"] } +axum-extra = { version = "0.10.0", features = ["typed-header"] } base64 = "0.13.0" bincode = "1.3" bindgen = "0.71" @@ -191,7 +192,7 @@ toml = "0.8" toml_edit = "0.22" tonic = {version = "0.12.3", default-features = false, features = ["channel", "tls", "tls-roots"]} tower = { version = "0.5.2", default-features = false } -tower-http = { version = "0.6.2", features = ["request-id", "trace"] } +tower-http = { version = "0.6.2", features = ["auth", "request-id", "trace"] } # This revision uses opentelemetry 0.27. There's no tag for it. tower-otel = { git = "https://github.com/mattiapenati/tower-otel", rev = "56a7321053bcb72443888257b622ba0d43a11fcd" } diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index 8f3bcbeef8..dd2896714d 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -17,6 +17,7 @@ aws-sdk-kms.workspace = true aws-smithy-types.workspace = true anyhow.workspace = true axum = { workspace = true, features = [] } +axum-extra.workspace = true camino.workspace = true chrono.workspace = true cfg-if.workspace = true @@ -25,6 +26,7 @@ fail.workspace = true flate2.workspace = true futures.workspace = true http.workspace = true +jsonwebtoken.workspace = true metrics.workspace = true nix.workspace = true notify.workspace = true diff --git a/compute_tools/src/bin/compute_ctl.rs b/compute_tools/src/bin/compute_ctl.rs index 08966a6efb..fc7a3e2827 100644 --- a/compute_tools/src/bin/compute_ctl.rs +++ b/compute_tools/src/bin/compute_ctl.rs @@ -179,6 +179,7 @@ fn main() -> Result<()> { live_config_allowed: cli_spec.live_config_allowed, }, cli_spec.spec, + cli_spec.compute_ctl_config, )?; let exit_code = compute_node.run()?; diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index e4d5a6aaba..d0b1bc2534 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -11,7 +11,7 @@ use std::{env, fs}; use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use compute_api::privilege::Privilege; -use compute_api::responses::{ComputeMetrics, ComputeStatus}; +use compute_api::responses::{ComputeCtlConfig, ComputeMetrics, ComputeStatus}; use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent}; use futures::StreamExt; use futures::future::join_all; @@ -132,6 +132,8 @@ pub struct ComputeState { /// passed by the control plane with a /configure HTTP request. pub pspec: Option, + pub compute_ctl_config: ComputeCtlConfig, + /// If the spec is passed by a /configure request, 'startup_span' is the /// /configure request's tracing span. The main thread enters it when it /// processes the compute startup, so that the compute startup is considered @@ -155,6 +157,7 @@ impl ComputeState { last_active: None, error: None, pspec: None, + compute_ctl_config: ComputeCtlConfig::default(), startup_span: None, metrics: ComputeMetrics::default(), } @@ -365,7 +368,11 @@ pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { } impl ComputeNode { - pub fn new(params: ComputeNodeParams, cli_spec: Option) -> Result { + pub fn new( + params: ComputeNodeParams, + cli_spec: Option, + compute_ctl_config: ComputeCtlConfig, + ) -> Result { let connstr = params.connstr.as_str(); let conn_conf = postgres::config::Config::from_str(connstr) .context("cannot build postgres config from connstr")?; @@ -377,6 +384,7 @@ impl ComputeNode { let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?; new_state.pspec = Some(pspec); } + new_state.compute_ctl_config = compute_ctl_config; Ok(ComputeNode { params, @@ -405,11 +413,19 @@ impl ComputeNode { // Launch the external HTTP server first, so that we can serve control plane // requests while configuration is still in progress. - crate::http::server::Server::External(this.params.external_http_port).launch(&this); + crate::http::server::Server::External { + port: this.params.external_http_port, + jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(), + compute_id: this.params.compute_id.clone(), + } + .launch(&this); // The internal HTTP server could be launched later, but there isn't much // sense in waiting. - crate::http::server::Server::Internal(this.params.internal_http_port).launch(&this); + crate::http::server::Server::Internal { + port: this.params.internal_http_port, + } + .launch(&this); // If we got a spec from the CLI already, use that. Otherwise wait for the // control plane to pass it to us with a /configure HTTP request diff --git a/compute_tools/src/http/extract/mod.rs b/compute_tools/src/http/extract/mod.rs index 1b690e444d..589681cfe2 100644 --- a/compute_tools/src/http/extract/mod.rs +++ b/compute_tools/src/http/extract/mod.rs @@ -1,7 +1,9 @@ pub(crate) mod json; pub(crate) mod path; pub(crate) mod query; +pub(crate) mod request_id; pub(crate) use json::Json; pub(crate) use path::Path; pub(crate) use query::Query; +pub(crate) use request_id::RequestId; diff --git a/compute_tools/src/http/extract/request_id.rs b/compute_tools/src/http/extract/request_id.rs new file mode 100644 index 0000000000..d911921a05 --- /dev/null +++ b/compute_tools/src/http/extract/request_id.rs @@ -0,0 +1,86 @@ +use std::{ + fmt::Display, + ops::{Deref, DerefMut}, +}; + +use axum::{extract::FromRequestParts, response::IntoResponse}; +use http::{StatusCode, request::Parts}; + +use crate::http::{JsonResponse, headers::X_REQUEST_ID}; + +/// Extract the request ID from the `X-Request-Id` header. +#[derive(Debug, Clone, Default)] +pub(crate) struct RequestId(pub String); + +#[derive(Debug)] +/// Rejection used for [`RequestId`]. +/// +/// Contains one variant for each way the [`RequestId`] extractor can +/// fail. +pub(crate) enum RequestIdRejection { + /// The request is missing the header. + MissingRequestId, + + /// The value of the header is invalid UTF-8. + InvalidUtf8, +} + +impl RequestIdRejection { + pub fn status(&self) -> StatusCode { + match self { + RequestIdRejection::MissingRequestId => StatusCode::INTERNAL_SERVER_ERROR, + RequestIdRejection::InvalidUtf8 => StatusCode::BAD_REQUEST, + } + } + + pub fn message(&self) -> String { + match self { + RequestIdRejection::MissingRequestId => "request ID is missing", + RequestIdRejection::InvalidUtf8 => "request ID is invalid UTF-8", + } + .to_string() + } +} + +impl IntoResponse for RequestIdRejection { + fn into_response(self) -> axum::response::Response { + JsonResponse::error(self.status(), self.message()) + } +} + +impl FromRequestParts for RequestId +where + S: Send + Sync, +{ + type Rejection = RequestIdRejection; + + async fn from_request_parts(parts: &mut Parts, _state: &S) -> Result { + match parts.headers.get(X_REQUEST_ID) { + Some(value) => match value.to_str() { + Ok(request_id) => Ok(Self(request_id.to_string())), + Err(_) => Err(RequestIdRejection::InvalidUtf8), + }, + None => Err(RequestIdRejection::MissingRequestId), + } + } +} + +impl Deref for RequestId { + type Target = String; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for RequestId { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Display for RequestId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} diff --git a/compute_tools/src/http/headers.rs b/compute_tools/src/http/headers.rs new file mode 100644 index 0000000000..a11638e203 --- /dev/null +++ b/compute_tools/src/http/headers.rs @@ -0,0 +1,2 @@ +/// Constant for `X-Request-Id` header. +pub const X_REQUEST_ID: &str = "x-request-id"; diff --git a/compute_tools/src/http/middleware/authorize.rs b/compute_tools/src/http/middleware/authorize.rs new file mode 100644 index 0000000000..798dd1179b --- /dev/null +++ b/compute_tools/src/http/middleware/authorize.rs @@ -0,0 +1,145 @@ +use std::{collections::HashSet, net::SocketAddr}; + +use anyhow::{Result, anyhow}; +use axum::{RequestExt, body::Body, extract::ConnectInfo}; +use axum_extra::{ + TypedHeader, + headers::{Authorization, authorization::Bearer}, +}; +use futures::future::BoxFuture; +use http::{Request, Response, StatusCode}; +use jsonwebtoken::{Algorithm, DecodingKey, TokenData, Validation, jwk::JwkSet}; +use serde::Deserialize; +use tower_http::auth::AsyncAuthorizeRequest; +use tracing::warn; + +use crate::http::{JsonResponse, extract::RequestId}; + +#[derive(Clone, Debug, Deserialize)] +pub(in crate::http) struct Claims { + compute_id: String, +} + +#[derive(Clone, Debug)] +pub(in crate::http) struct Authorize { + compute_id: String, + jwks: JwkSet, + validation: Validation, +} + +impl Authorize { + pub fn new(compute_id: String, jwks: JwkSet) -> Self { + let mut validation = Validation::new(Algorithm::EdDSA); + // Nothing is currently required + validation.required_spec_claims = HashSet::new(); + validation.validate_exp = true; + // Unused by the control plane + validation.validate_aud = false; + // Unused by the control plane + validation.validate_nbf = false; + + Self { + compute_id, + jwks, + validation, + } + } +} + +impl AsyncAuthorizeRequest for Authorize { + type RequestBody = Body; + type ResponseBody = Body; + type Future = BoxFuture<'static, Result, Response>>; + + fn authorize(&mut self, mut request: Request) -> Self::Future { + let compute_id = self.compute_id.clone(); + let jwks = self.jwks.clone(); + let validation = self.validation.clone(); + + Box::pin(async move { + let request_id = request.extract_parts::().await.unwrap(); + + // TODO: Remove this check after a successful rollout + if jwks.keys.is_empty() { + warn!(%request_id, "Authorization has not been configured"); + + return Ok(request); + } + + let connect_info = request + .extract_parts::>() + .await + .unwrap(); + + // In the event the request is coming from the loopback interface, + // allow all requests + if connect_info.ip().is_loopback() { + warn!(%request_id, "Bypassed authorization because request is coming from the loopback interface"); + + return Ok(request); + } + + let TypedHeader(Authorization(bearer)) = request + .extract_parts::>>() + .await + .map_err(|_| { + JsonResponse::error(StatusCode::BAD_REQUEST, "invalid authorization token") + })?; + + let data = match Self::verify(&jwks, bearer.token(), &validation) { + Ok(claims) => claims, + Err(e) => return Err(JsonResponse::error(StatusCode::UNAUTHORIZED, e)), + }; + + if data.claims.compute_id != compute_id { + return Err(JsonResponse::error( + StatusCode::UNAUTHORIZED, + "invalid claims in authorization token", + )); + } + + // Make claims available to any subsequent middleware or request + // handlers + request.extensions_mut().insert(data.claims); + + Ok(request) + }) + } +} + +impl Authorize { + /// Verify the token using the JSON Web Key set and return the token data. + fn verify(jwks: &JwkSet, token: &str, validation: &Validation) -> Result> { + debug_assert!(!jwks.keys.is_empty()); + + for jwk in jwks.keys.iter() { + let decoding_key = match DecodingKey::from_jwk(jwk) { + Ok(key) => key, + Err(e) => { + warn!( + "Failed to construct decoding key from {}: {}", + jwk.common.key_id.as_ref().unwrap(), + e + ); + + continue; + } + }; + + match jsonwebtoken::decode::(token, &decoding_key, validation) { + Ok(data) => return Ok(data), + Err(e) => { + warn!( + "Failed to decode authorization token using {}: {}", + jwk.common.key_id.as_ref().unwrap(), + e + ); + + continue; + } + } + } + + Err(anyhow!("Failed to verify authorization token")) + } +} diff --git a/compute_tools/src/http/middleware/mod.rs b/compute_tools/src/http/middleware/mod.rs new file mode 100644 index 0000000000..caeeeedfe5 --- /dev/null +++ b/compute_tools/src/http/middleware/mod.rs @@ -0,0 +1 @@ +pub(in crate::http) mod authorize; diff --git a/compute_tools/src/http/mod.rs b/compute_tools/src/http/mod.rs index d182278174..9ecc1b0093 100644 --- a/compute_tools/src/http/mod.rs +++ b/compute_tools/src/http/mod.rs @@ -7,6 +7,8 @@ use serde::Serialize; use tracing::error; mod extract; +mod headers; +mod middleware; mod routes; pub mod server; diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index 7283401bb5..126fa86d1c 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -10,48 +10,58 @@ use axum::middleware::{self, Next}; use axum::response::{IntoResponse, Response}; use axum::routing::{get, post}; use http::StatusCode; +use jsonwebtoken::jwk::JwkSet; use tokio::net::TcpListener; use tower::ServiceBuilder; -use tower_http::request_id::PropagateRequestIdLayer; -use tower_http::trace::TraceLayer; -use tracing::{Span, debug, error, info}; +use tower_http::{ + auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer, +}; +use tracing::{Span, error, info}; use uuid::Uuid; -use super::routes::{ - check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions, - grants, insights, metrics, metrics_json, status, terminate, +use super::{ + headers::X_REQUEST_ID, + middleware::authorize::Authorize, + routes::{ + check_writability, configure, database_schema, dbs_and_roles, extension_server, extensions, + grants, insights, metrics, metrics_json, status, terminate, + }, }; use crate::compute::ComputeNode; -const X_REQUEST_ID: &str = "x-request-id"; - /// `compute_ctl` has two servers: internal and external. The internal server /// binds to the loopback interface and handles communication from clients on /// the compute. The external server is what receives communication from the /// control plane, the metrics scraper, etc. We make the distinction because /// certain routes in `compute_ctl` only need to be exposed to local processes /// like Postgres via the neon extension and local_proxy. -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Debug)] pub enum Server { - Internal(u16), - External(u16), + Internal { + port: u16, + }, + External { + port: u16, + jwks: JwkSet, + compute_id: String, + }, } impl Display for Server { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Server::Internal(_) => f.write_str("internal"), - Server::External(_) => f.write_str("external"), + Server::Internal { .. } => f.write_str("internal"), + Server::External { .. } => f.write_str("external"), } } } -impl From for Router> { - fn from(server: Server) -> Self { +impl From<&Server> for Router> { + fn from(server: &Server) -> Self { let mut router = Router::>::new(); router = match server { - Server::Internal(_) => { + Server::Internal { .. } => { router = router .route( "/extension_server/{*filename}", @@ -69,59 +79,71 @@ impl From for Router> { router } - Server::External(_) => router - .route("/check_writability", post(check_writability::is_writable)) - .route("/configure", post(configure::configure)) - .route("/database_schema", get(database_schema::get_schema_dump)) - .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects)) - .route("/insights", get(insights::get_insights)) - .route("/metrics", get(metrics::get_metrics)) - .route("/metrics.json", get(metrics_json::get_metrics)) - .route("/status", get(status::get_status)) - .route("/terminate", post(terminate::terminate)), + Server::External { + jwks, compute_id, .. + } => { + let unauthenticated_router = + Router::>::new().route("/metrics", get(metrics::get_metrics)); + + let authenticated_router = Router::>::new() + .route("/check_writability", post(check_writability::is_writable)) + .route("/configure", post(configure::configure)) + .route("/database_schema", get(database_schema::get_schema_dump)) + .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects)) + .route("/insights", get(insights::get_insights)) + .route("/metrics.json", get(metrics_json::get_metrics)) + .route("/status", get(status::get_status)) + .route("/terminate", post(terminate::terminate)) + .layer(AsyncRequireAuthorizationLayer::new(Authorize::new( + compute_id.clone(), + jwks.clone(), + ))); + + router + .merge(unauthenticated_router) + .merge(authenticated_router) + } }; - router.fallback(Server::handle_404).method_not_allowed_fallback(Server::handle_405).layer( - ServiceBuilder::new() - // Add this middleware since we assume the request ID exists - .layer(middleware::from_fn(maybe_add_request_id_header)) - .layer( - TraceLayer::new_for_http() - .on_request(|request: &http::Request<_>, _span: &Span| { - let request_id = request - .headers() - .get(X_REQUEST_ID) - .unwrap() - .to_str() - .unwrap(); - - match request.uri().path() { - "/metrics" => { - debug!(%request_id, "{} {}", request.method(), request.uri()) - } - _ => info!(%request_id, "{} {}", request.method(), request.uri()), - }; - }) - .on_response( - |response: &http::Response<_>, latency: Duration, _span: &Span| { - let request_id = response + router + .fallback(Server::handle_404) + .method_not_allowed_fallback(Server::handle_405) + .layer( + ServiceBuilder::new() + .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO)) + // Add this middleware since we assume the request ID exists + .layer(middleware::from_fn(maybe_add_request_id_header)) + .layer( + TraceLayer::new_for_http() + .on_request(|request: &http::Request<_>, _span: &Span| { + let request_id = request .headers() .get(X_REQUEST_ID) .unwrap() .to_str() .unwrap(); - info!( - %request_id, - code = response.status().as_u16(), - latency = latency.as_millis() - ) - }, - ), - ) - .layer(PropagateRequestIdLayer::x_request_id()), - ) - .layer(tower_otel::trace::HttpLayer::server(tracing::Level::INFO)) + info!(%request_id, "{} {}", request.method(), request.uri()); + }) + .on_response( + |response: &http::Response<_>, latency: Duration, _span: &Span| { + let request_id = response + .headers() + .get(X_REQUEST_ID) + .unwrap() + .to_str() + .unwrap(); + + info!( + %request_id, + code = response.status().as_u16(), + latency = latency.as_millis() + ); + }, + ), + ) + .layer(PropagateRequestIdLayer::x_request_id()), + ) } } @@ -145,15 +167,15 @@ impl Server { match self { // TODO: Change this to Ipv6Addr::LOCALHOST when the GitHub runners // allow binding to localhost - Server::Internal(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED), - Server::External(_) => IpAddr::from(Ipv6Addr::UNSPECIFIED), + Server::Internal { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED), + Server::External { .. } => IpAddr::from(Ipv6Addr::UNSPECIFIED), } } - fn port(self) -> u16 { + fn port(&self) -> u16 { match self { - Server::Internal(port) => port, - Server::External(port) => port, + Server::Internal { port, .. } => *port, + Server::External { port, .. } => *port, } } @@ -180,7 +202,9 @@ impl Server { ); } - let router = Router::from(self).with_state(compute); + let router = Router::from(&self) + .with_state(compute) + .into_make_service_with_connect_info::(); if let Err(e) = axum::serve(listener, router).await { error!("compute_ctl {} HTTP server error: {}", self, e); diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index 35c580bd37..3300fbf7dd 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -134,8 +134,10 @@ pub struct CatalogObjects { pub databases: Vec, } -#[derive(Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize)] pub struct ComputeCtlConfig { + /// Set of JSON web keys that the compute can use to authenticate + /// communication from the control plane. pub jwks: JwkSet, } diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 1b7c376560..183cc66ab9 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -18,7 +18,7 @@ license.workspace = true ahash = { version = "0.8" } anyhow = { version = "1", features = ["backtrace"] } base64-594e8ee84c453af0 = { package = "base64", version = "0.13", features = ["alloc"] } -base64-647d43efb71741da = { package = "base64", version = "0.21", features = ["alloc"] } +base64-647d43efb71741da = { package = "base64", version = "0.21" } base64ct = { version = "1", default-features = false, features = ["std"] } bytes = { version = "1", features = ["serde"] } camino = { version = "1", default-features = false, features = ["serde1"] } From f62ddb11ed8883842fecf44ddd85594c2562856b Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Tue, 4 Mar 2025 19:11:43 +0100 Subject: [PATCH 085/207] Distinguish manually submitted runs for periodic pagebench in grafana dashboard (#11079) ## Problem Periodic pagebench workflow runs periodically from latest main commit and also allows to dispatch it manually for a given commit hash to bi-sect regressions. However in the dashboards we can not distinguish manual runs from periodic runs which makes it harder to follow the trend. ## Summary of changes Send an additional flag commit type to the benchmark runner instance to distinguish the run type. Note: this needs a follow-up PR on the receiving side. --- .github/workflows/periodic_pagebench.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index af877029e4..0622faba33 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -78,8 +78,10 @@ jobs: run: | if [ -z "$INPUT_COMMIT_HASH" ]; then echo "COMMIT_HASH=$(curl -s https://api.github.com/repos/neondatabase/neon/commits/main | jq -r '.sha')" >> $GITHUB_ENV + echo "COMMIT_HASH_TYPE=latest" >> $GITHUB_ENV else echo "COMMIT_HASH=$INPUT_COMMIT_HASH" >> $GITHUB_ENV + echo "COMMIT_HASH_TYPE=manual" >> $GITHUB_ENV fi - name: Start Bench with run_id @@ -89,7 +91,7 @@ jobs: -H 'accept: application/json' \ -H 'Content-Type: application/json' \ -H "Authorization: Bearer $API_KEY" \ - -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\"}" + -d "{\"neonRepoCommitHash\": \"${COMMIT_HASH}\", \"neonRepoCommitHashType\": \"${COMMIT_HASH_TYPE}\"}" - name: Poll Test Status id: poll_step From 438f7bb72697a7e373448f572f781d3d81dde960 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 5 Mar 2025 12:03:09 +0200 Subject: [PATCH 086/207] Check response status in prefetch_lookup (#11080) ## Problem New async prefetch introduces `prefetch+lookup[` function which is called before LFC lookup to check if prefetch request is already completed. This function is not containing now check that response is actually `T_NeonGetPageResponse` (and not error). ## Summary of changes Add checks for response tag. --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/pagestore_smgr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index fe463fd4a6..0414661a5f 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -1026,6 +1026,19 @@ prefetch_lookupv(NRelFileInfo rinfo, ForkNumber forknum, BlockNumber blocknum, n if (!neon_prefetch_response_usable(&lsns[i], slot)) continue; + /* + * Ignore errors + */ + if (slot->response->tag != T_NeonGetPageResponse) + { + if (slot->response->tag != T_NeonErrorResponse) + { + NEON_PANIC_CONNECTION_STATE(slot->shard_no, PANIC, + "Expected GetPage (0x%02x) or Error (0x%02x) response to GetPageRequest, but got 0x%02x", + T_NeonGetPageResponse, T_NeonErrorResponse, slot->response->tag); + } + continue; + } memcpy(buffers[i], ((NeonGetPageResponse*)slot->response)->page, BLCKSZ); prefetch_set_unused(ring_index); BITMAP_SET(mask, i); From 906d7468cc15248f3ebce3307370504bd07452e7 Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Wed, 5 Mar 2025 11:14:51 +0100 Subject: [PATCH 087/207] exclude separate perf tests from bench step (#11084) ## Problem Our benchmarking workflow has a job step `bench`which runs all tests in test_runner/performance/* except those that we want to run separately. We recently added two test cases to that testcase directory that we want to run separately but forgot to ignore them during the bench step. This is now causing [failures](https://github.com/neondatabase/neon/actions/runs/13667689340/job/38212087331#step:7:392). ## Summary of changes Ignore the separately run tests in the bench step. --- .github/workflows/benchmarking.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index ffb6c65af9..ff7db02e42 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -141,6 +141,8 @@ jobs: --ignore test_runner/performance/test_physical_replication.py --ignore test_runner/performance/test_perf_ingest_using_pgcopydb.py --ignore test_runner/performance/test_cumulative_statistics_persistence.py + --ignore test_runner/performance/test_perf_many_relations.py + --ignore test_runner/performance/test_perf_oltp_large_tenant.py env: BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" From 8e51bfc59767a25b7dce58516353dfb9635ea710 Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Wed, 5 Mar 2025 10:27:46 +0000 Subject: [PATCH 088/207] proxy: JSON logging field refactor (#11078) ## Problem Grafana Loki's JSON handling is somewhat limited and the log message should be structured in a way that it's easy to sift through logs and filter. ## Summary of changes * Drop span_id. It's too short lived to be of value and only bloats the logs. * Use the span's name as the object key, but append a unique numeric value to prevent name collisions. * Extract interesting span fields into a separate object at the root. New format: ```json { "timestamp": "2025-03-04T18:54:44.134435Z", "level": "INFO", "message": "connected to compute node at 127.0.0.1 (127.0.0.1:5432) latency=client: 22.002292ms, cplane: 0ns, compute: 5.338875ms, retry: 0ns", "fields": { "cold_start_info": "unknown" }, "process_id": 56675, "thread_id": 9122892, "task_id": "24", "target": "proxy::compute", "src": "proxy/src/compute.rs:288", "trace_id": "5eb89b840ec63fee5fc56cebd633e197", "spans": { "connect_request#1": { "ep": "endpoint", "role": "proxy", "session_id": "b8a41818-12bd-4c3f-8ef0-9a942cc99514", "protocol": "tcp", "conn_info": "127.0.0.1" }, "connect_to_compute#6": {}, "connect_once#8": { "compute_id": "compute", "pid": "853" } }, "extract": { "session_id": "b8a41818-12bd-4c3f-8ef0-9a942cc99514" } } ``` --- Cargo.lock | 8 +- proxy/Cargo.toml | 2 +- proxy/src/logging.rs | 250 +++++++++++++++++++++++++++++++++++-------- 3 files changed, 209 insertions(+), 51 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 772b1f50c6..7aa9c53e7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4385,9 +4385,9 @@ dependencies = [ [[package]] name = "papaya" -version = "0.1.8" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc7c76487f7eaa00a0fc1d7f88dc6b295aec478d11b0fc79f857b62c2874124c" +checksum = "aab21828b6b5952fdadd6c377728ffae53ec3a21b2febc47319ab65741f7e2fd" dependencies = [ "equivalent", "seize", @@ -6110,9 +6110,9 @@ dependencies = [ [[package]] name = "seize" -version = "0.4.9" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d84b0c858bdd30cb56f5597f8b3bf702ec23829e652cc636a1e5a7b9de46ae93" +checksum = "e4b8d813387d566f627f3ea1b914c068aac94c40ae27ec43f5f33bde65abefe7" dependencies = [ "libc", "windows-sys 0.52.0", diff --git a/proxy/Cargo.toml b/proxy/Cargo.toml index 5964b76ecf..b6e3f03a81 100644 --- a/proxy/Cargo.toml +++ b/proxy/Cargo.toml @@ -53,7 +53,7 @@ measured = { workspace = true, features = ["lasso"] } metrics.workspace = true once_cell.workspace = true opentelemetry = { workspace = true, features = ["trace"] } -papaya = "0.1.8" +papaya = "0.2.0" parking_lot.workspace = true parquet.workspace = true parquet_derive.workspace = true diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index 3c34918d84..b2e95a109f 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -1,9 +1,11 @@ use std::cell::{Cell, RefCell}; use std::collections::HashMap; use std::hash::BuildHasher; -use std::{env, io}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::{array, env, fmt, io}; use chrono::{DateTime, Utc}; +use indexmap::IndexSet; use opentelemetry::trace::TraceContextExt; use scopeguard::defer; use serde::ser::{SerializeMap, Serializer}; @@ -17,6 +19,7 @@ use tracing_subscriber::fmt::{FormatEvent, FormatFields}; use tracing_subscriber::layer::{Context, Layer}; use tracing_subscriber::prelude::*; use tracing_subscriber::registry::{LookupSpan, SpanRef}; +use try_lock::TryLock; /// Initialize logging and OpenTelemetry tracing and exporter. /// @@ -46,13 +49,13 @@ pub async fn init() -> anyhow::Result { let otlp_layer = tracing_utils::init_tracing("proxy").await; let json_log_layer = if logfmt == LogFormat::Json { - Some(JsonLoggingLayer { - clock: RealClock, - skipped_field_indices: papaya::HashMap::default(), - writer: StderrWriter { + Some(JsonLoggingLayer::new( + RealClock, + StderrWriter { stderr: std::io::stderr(), }, - }) + ["request_id", "session_id", "conn_id"], + )) } else { None }; @@ -191,13 +194,39 @@ thread_local! { } /// Implements tracing layer to handle events specific to logging. -struct JsonLoggingLayer { +struct JsonLoggingLayer { clock: C, skipped_field_indices: papaya::HashMap, + callsite_ids: papaya::HashMap, writer: W, + // We use a const generic and arrays to bypass one heap allocation. + extract_fields: IndexSet<&'static str>, + _marker: std::marker::PhantomData<[&'static str; F]>, } -impl Layer for JsonLoggingLayer +impl JsonLoggingLayer { + fn new(clock: C, writer: W, extract_fields: [&'static str; F]) -> Self { + JsonLoggingLayer { + clock, + skipped_field_indices: papaya::HashMap::default(), + callsite_ids: papaya::HashMap::default(), + writer, + extract_fields: IndexSet::from_iter(extract_fields), + _marker: std::marker::PhantomData, + } + } + + #[inline] + fn callsite_id(&self, cs: callsite::Identifier) -> CallsiteId { + *self + .callsite_ids + .pin() + .get_or_insert_with(cs, CallsiteId::next) + } +} + +impl Layer + for JsonLoggingLayer where S: Subscriber + for<'a> LookupSpan<'a>, { @@ -211,7 +240,14 @@ where let res: io::Result<()> = REENTRANCY_GUARD.with(move |entered| { if entered.get() { let mut formatter = EventFormatter::new(); - formatter.format(now, event, &ctx, &self.skipped_field_indices)?; + formatter.format::( + now, + event, + &ctx, + &self.skipped_field_indices, + &self.callsite_ids, + &self.extract_fields, + )?; self.writer.make_writer().write_all(formatter.buffer()) } else { entered.set(true); @@ -219,7 +255,14 @@ where EVENT_FORMATTER.with_borrow_mut(move |formatter| { formatter.reset(); - formatter.format(now, event, &ctx, &self.skipped_field_indices)?; + formatter.format::( + now, + event, + &ctx, + &self.skipped_field_indices, + &self.callsite_ids, + &self.extract_fields, + )?; self.writer.make_writer().write_all(formatter.buffer()) }) } @@ -243,13 +286,17 @@ where /// Registers a SpanFields instance as span extension. fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) { + let csid = self.callsite_id(attrs.metadata().callsite()); let span = ctx.span(id).expect("span must exist"); let fields = SpanFields::default(); fields.record_fields(attrs); // This could deadlock when there's a panic somewhere in the tracing // event handling and a read or write guard is still held. This includes // the OTel subscriber. - span.extensions_mut().insert(fields); + let mut exts = span.extensions_mut(); + + exts.insert(fields); + exts.insert(csid); } fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) { @@ -265,6 +312,7 @@ where /// wins. fn register_callsite(&self, metadata: &'static Metadata<'static>) -> Interest { if !metadata.is_event() { + self.callsite_id(metadata.callsite()); // Must not be never because we wouldn't get trace and span data. return Interest::always(); } @@ -297,6 +345,26 @@ where } } +#[derive(Copy, Clone, Debug, Default)] +#[repr(transparent)] +struct CallsiteId(u32); + +impl CallsiteId { + #[inline] + fn next() -> Self { + // Start at 1 to reserve 0 for default. + static COUNTER: AtomicU32 = AtomicU32::new(1); + CallsiteId(COUNTER.fetch_add(1, Ordering::Relaxed)) + } +} + +impl fmt::Display for CallsiteId { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + /// Stores span field values recorded during the spans lifetime. #[derive(Default)] struct SpanFields { @@ -448,12 +516,14 @@ impl EventFormatter { self.logline_buffer.clear(); } - fn format( + fn format( &mut self, now: DateTime, event: &Event<'_>, ctx: &Context<'_, S>, skipped_field_indices: &papaya::HashMap, + callsite_ids: &papaya::HashMap, + extract_fields: &IndexSet<&'static str>, ) -> io::Result<()> where S: Subscriber + for<'a> LookupSpan<'a>, @@ -485,6 +555,7 @@ impl EventFormatter { event.record(&mut message_extractor); let mut serializer = message_extractor.into_serializer()?; + // Direct message fields. let mut fields_present = FieldsPresent(false, skipped_field_indices); event.record(&mut fields_present); if fields_present.0 { @@ -494,7 +565,9 @@ impl EventFormatter { )?; } + // TODO: thread-local cache? let pid = std::process::id(); + // Skip adding pid 1 to reduce noise for services running in containers. if pid != 1 { serializer.serialize_entry("process_id", &pid)?; } @@ -514,6 +587,7 @@ impl EventFormatter { serializer.serialize_entry("target", meta.target())?; + // Skip adding module if it's the same as target. if let Some(module) = meta.module_path() { if module != meta.target() { serializer.serialize_entry("module", module)?; @@ -540,7 +614,16 @@ impl EventFormatter { } } - serializer.serialize_entry("spans", &SerializableSpanStack(ctx))?; + let stack = SerializableSpans { + ctx, + callsite_ids, + fields: ExtractedSpanFields::<'_, F>::new(extract_fields), + }; + serializer.serialize_entry("spans", &stack)?; + + if stack.fields.has_values() { + serializer.serialize_entry("extract", &stack.fields)?; + } serializer.end() }; @@ -818,15 +901,20 @@ impl tracing::field::Visit for MessageFieldSkipper< } } -/// Serializes the span stack from root to leaf (parent of event) enumerated -/// inside an object where the keys are just the number padded with zeroes -/// to retain sorting order. -// The object is necessary because Loki cannot flatten arrays. -struct SerializableSpanStack<'a, 'b, Span>(&'b Context<'a, Span>) +/// Serializes the span stack from root to leaf (parent of event) as object +/// with the span names as keys. To prevent collision we append a numberic value +/// to the name. Also, collects any span fields we're interested in. Last one +/// wins. +struct SerializableSpans<'a, 'ctx, Span, const F: usize> where - Span: Subscriber + for<'lookup> LookupSpan<'lookup>; + Span: Subscriber + for<'lookup> LookupSpan<'lookup>, +{ + ctx: &'a Context<'ctx, Span>, + callsite_ids: &'a papaya::HashMap, + fields: ExtractedSpanFields<'a, F>, +} -impl serde::ser::Serialize for SerializableSpanStack<'_, '_, Span> +impl serde::ser::Serialize for SerializableSpans<'_, '_, Span, F> where Span: Subscriber + for<'lookup> LookupSpan<'lookup>, { @@ -836,9 +924,24 @@ where { let mut serializer = serializer.serialize_map(None)?; - if let Some(leaf_span) = self.0.lookup_current() { - for (i, span) in leaf_span.scope().from_root().enumerate() { - serializer.serialize_entry(&format_args!("{i:02}"), &SerializableSpan(&span))?; + if let Some(leaf_span) = self.ctx.lookup_current() { + for span in leaf_span.scope().from_root() { + // Append a numeric callsite ID to the span name to keep the name unique + // in the JSON object. + let cid = self + .callsite_ids + .pin() + .get(&span.metadata().callsite()) + .copied() + .unwrap_or_default(); + + // Loki turns the # into an underscore during field name concatenation. + serializer.serialize_key(&format_args!("{}#{}", span.metadata().name(), &cid))?; + + serializer.serialize_value(&SerializableSpanFields { + span: &span, + fields: &self.fields, + })?; } } @@ -846,28 +949,79 @@ where } } -/// Serializes a single span. Include the span ID, name and its fields as -/// recorded up to this point. -struct SerializableSpan<'a, 'b, Span>(&'b SpanRef<'a, Span>) -where - Span: for<'lookup> LookupSpan<'lookup>; - -impl serde::ser::Serialize for SerializableSpan<'_, '_, Span> +/// Serializes the span fields as object. +struct SerializableSpanFields<'a, 'span, Span, const F: usize> where Span: for<'lookup> LookupSpan<'lookup>, { - fn serialize(&self, serializer: Ser) -> Result + span: &'a SpanRef<'span, Span>, + fields: &'a ExtractedSpanFields<'a, F>, +} + +impl serde::ser::Serialize for SerializableSpanFields<'_, '_, Span, F> +where + Span: for<'lookup> LookupSpan<'lookup>, +{ + fn serialize(&self, serializer: S) -> Result where - Ser: serde::ser::Serializer, + S: serde::ser::Serializer, { let mut serializer = serializer.serialize_map(None)?; - // TODO: the span ID is probably only useful for debugging tracing. - serializer.serialize_entry("span_id", &format_args!("{:016x}", self.0.id().into_u64()))?; - serializer.serialize_entry("span_name", self.0.metadata().name())?; - let ext = self.0.extensions(); + let ext = self.span.extensions(); if let Some(data) = ext.get::() { - for (key, value) in &data.fields.pin() { + for (name, value) in &data.fields.pin() { + serializer.serialize_entry(name, value)?; + // TODO: replace clone with reference, if possible. + self.fields.set(name, value.clone()); + } + } + + serializer.end() + } +} + +struct ExtractedSpanFields<'a, const F: usize> { + names: &'a IndexSet<&'static str>, + // TODO: replace TryLock with something local thread and interior mutability. + // serde API doesn't let us use `mut`. + values: TryLock<([Option; F], bool)>, +} + +impl<'a, const F: usize> ExtractedSpanFields<'a, F> { + fn new(names: &'a IndexSet<&'static str>) -> Self { + ExtractedSpanFields { + names, + values: TryLock::new((array::from_fn(|_| Option::default()), false)), + } + } + + #[inline] + fn set(&self, name: &'static str, value: serde_json::Value) { + if let Some((index, _)) = self.names.get_full(name) { + let mut fields = self.values.try_lock().expect("thread-local use"); + fields.0[index] = Some(value); + fields.1 = true; + } + } + + #[inline] + fn has_values(&self) -> bool { + self.values.try_lock().expect("thread-local use").1 + } +} + +impl serde::ser::Serialize for ExtractedSpanFields<'_, F> { + fn serialize(&self, serializer: S) -> Result + where + S: serde::ser::Serializer, + { + let mut serializer = serializer.serialize_map(None)?; + + let values = self.values.try_lock().expect("thread-local use"); + for (i, value) in values.0.iter().enumerate() { + if let Some(value) = value { + let key = self.names[i]; serializer.serialize_entry(key, value)?; } } @@ -879,6 +1033,7 @@ where #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use std::marker::PhantomData; use std::sync::{Arc, Mutex, MutexGuard}; use assert_json_diff::assert_json_eq; @@ -927,14 +1082,17 @@ mod tests { let log_layer = JsonLoggingLayer { clock: clock.clone(), skipped_field_indices: papaya::HashMap::default(), + callsite_ids: papaya::HashMap::default(), writer: buffer.clone(), + extract_fields: IndexSet::from_iter(["x"]), + _marker: PhantomData::<[&'static str; 1]>, }; let registry = tracing_subscriber::Registry::default().with(log_layer); tracing::subscriber::with_default(registry, || { - info_span!("span1", x = 40, x = 41, x = 42).in_scope(|| { - info_span!("span2").in_scope(|| { + info_span!("some_span", x = 24).in_scope(|| { + info_span!("some_span", x = 40, x = 41, x = 42).in_scope(|| { tracing::error!( a = 1, a = 2, @@ -960,16 +1118,16 @@ mod tests { "a": 3, }, "spans": { - "00":{ - "span_id": "0000000000000001", - "span_name": "span1", - "x": 42, + "some_span#1":{ + "x": 24, }, - "01": { - "span_id": "0000000000000002", - "span_name": "span2", + "some_span#2": { + "x": 42, } }, + "extract": { + "x": 42, + }, "src": actual.as_object().unwrap().get("src").unwrap().as_str().unwrap(), "target": "proxy::logging::tests", "process_id": actual.as_object().unwrap().get("process_id").unwrap().as_number().unwrap(), From 40aa4d7151029fd0889ecb5f365c87a84d673d06 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 5 Mar 2025 12:23:07 +0100 Subject: [PATCH 089/207] utils: log Sentry initialization (#11077) ## Problem We don't have any logging for Sentry initialization. This makes it hard to verify that it has been configured correctly. ## Summary of changes Log some basic info when Sentry has been initialized, but omit the public key (which allows submitting events). Also log when `SENTRY_DSN` isn't specified at all, and when it fails to initialize (which is supposed to panic, but we may as well). --- libs/utils/src/sentry_init.rs | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/libs/utils/src/sentry_init.rs b/libs/utils/src/sentry_init.rs index d77dbba087..72d192a591 100644 --- a/libs/utils/src/sentry_init.rs +++ b/libs/utils/src/sentry_init.rs @@ -3,20 +3,24 @@ use std::env; use sentry::ClientInitGuard; pub use sentry::release_name; +use tracing::{error, info}; #[must_use] pub fn init_sentry( release_name: Option>, extra_options: &[(&str, &str)], ) -> Option { - let dsn = env::var("SENTRY_DSN").ok()?; + let Ok(dsn) = env::var("SENTRY_DSN") else { + info!("not initializing Sentry, no SENTRY_DSN given"); + return None; + }; let environment = env::var("SENTRY_ENVIRONMENT").unwrap_or_else(|_| "development".into()); let guard = sentry::init(( dsn, sentry::ClientOptions { - release: release_name, - environment: Some(environment.into()), + release: release_name.clone(), + environment: Some(environment.clone().into()), ..Default::default() }, )); @@ -25,5 +29,19 @@ pub fn init_sentry( scope.set_extra(key, value.into()); } }); + + if let Some(dsn) = guard.dsn() { + info!( + "initialized Sentry for project {}, environment {}, release {} (using API {})", + dsn.project_id(), + environment, + release_name.unwrap_or(Cow::Borrowed("None")), + dsn.envelope_api_url(), + ); + } else { + // This should panic during sentry::init(), but we may as well cover it. + error!("failed to initialize Sentry, invalid DSN"); + } + Some(guard) } From 38a883118a87214062fb401ba83308a9aeeebe40 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Wed, 5 Mar 2025 11:29:46 +0000 Subject: [PATCH 090/207] Skip dropping tablesync replication slots on the publisher from branch (#11073) fixes https://github.com/neondatabase/cloud/issues/24292 Do not drop tablesync replication slots on the publisher, when we're in the process of dropping subscriptions inherited by a neon branch. Because these slots are still needed by the parent branch subscriptions. For regular slots we handle this by setting the slot_name to NONE before calling DROP SUBSCRIPTION, but tablesync slots are not exposed to SQL. rely on GUC disable_logical_replication_subscribers=true to know that we're in the Neon-specific process of dropping subscriptions. --- vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index 6254ab9b44..b1425505c6 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit 6254ab9b4496c3e481bc037ae69d859bbc2bdd7d +Subproject commit b1425505c6f9a622a5aadf3ee362740519993310 diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 9b118b1cff..533be42f7d 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 9b118b1cffa6e4ca0d63389b57b54d11e207e9a8 +Subproject commit 533be42f7da97e614ce1c494fafe3e49f53991b1 diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 799e7a08dd..78050f965f 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 799e7a08dd171aa06a7395dd326f4243aaeb9f93 +Subproject commit 78050f965f2e550fd6e58f837394cb3d080d7d42 diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 517b8dc244..780efda2ef 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 517b8dc244abf3e56f0089849e464af76f70b94e +Subproject commit 780efda2ef8d629495cc289624534ba8cde40779 diff --git a/vendor/revisions.json b/vendor/revisions.json index 8dde46a01e..1a811cfa3d 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17.4", - "517b8dc244abf3e56f0089849e464af76f70b94e" + "780efda2ef8d629495cc289624534ba8cde40779" ], "v16": [ "16.8", - "799e7a08dd171aa06a7395dd326f4243aaeb9f93" + "78050f965f2e550fd6e58f837394cb3d080d7d42" ], "v15": [ "15.12", - "9b118b1cffa6e4ca0d63389b57b54d11e207e9a8" + "533be42f7da97e614ce1c494fafe3e49f53991b1" ], "v14": [ "14.17", - "6254ab9b4496c3e481bc037ae69d859bbc2bdd7d" + "b1425505c6f9a622a5aadf3ee362740519993310" ] } From abae7637d6f4fd392c7ad2be15309222d1290d16 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 5 Mar 2025 11:55:55 +0000 Subject: [PATCH 091/207] pageserver: do big reads to fetch slru segment (#11029) ## Problem Each page of the slru segment is fetched individually when it's loaded on demand. ## Summary of Changes Use `Timeline::get_vectored` to fetch 16 at a time. --- pageserver/src/pgdatadir_mapping.rs | 92 +++++++++++++++++++---------- 1 file changed, 61 insertions(+), 31 deletions(-) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 8aa96dd672..e663060d17 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -602,28 +602,36 @@ impl Timeline { let n_blocks = self .get_slru_segment_size(kind, segno, Version::Lsn(lsn), ctx) .await?; - let mut segment = BytesMut::with_capacity(n_blocks as usize * BLCKSZ as usize); - for blkno in 0..n_blocks { - let block = self - .get_slru_page_at_lsn(kind, segno, blkno, lsn, ctx) - .await?; - segment.extend_from_slice(&block[..BLCKSZ as usize]); - } - Ok(segment.freeze()) - } - /// Look up given SLRU page version. - pub(crate) async fn get_slru_page_at_lsn( - &self, - kind: SlruKind, - segno: u32, - blknum: BlockNumber, - lsn: Lsn, - ctx: &RequestContext, - ) -> Result { - assert!(self.tenant_shard_id.is_shard_zero()); - let key = slru_block_to_key(kind, segno, blknum); - self.get(key, lsn, ctx).await + let keyspace = KeySpace::single( + slru_block_to_key(kind, segno, 0)..slru_block_to_key(kind, segno, n_blocks), + ); + + let batches = keyspace.partition( + self.get_shard_identity(), + Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64, + ); + + let io_concurrency = IoConcurrency::spawn_from_conf( + self.conf, + self.gate + .enter() + .map_err(|_| PageReconstructError::Cancelled)?, + ); + + let mut segment = BytesMut::with_capacity(n_blocks as usize * BLCKSZ as usize); + for batch in batches.parts { + let blocks = self + .get_vectored(batch, lsn, io_concurrency.clone(), ctx) + .await?; + + for (_key, block) in blocks { + let block = block?; + segment.extend_from_slice(&block[..BLCKSZ as usize]); + } + } + + Ok(segment.freeze()) } /// Get size of an SLRU segment @@ -832,19 +840,41 @@ impl Timeline { let nblocks = self .get_slru_segment_size(SlruKind::Clog, segno, Version::Lsn(probe_lsn), ctx) .await?; - for blknum in (0..nblocks).rev() { - let clog_page = self - .get_slru_page_at_lsn(SlruKind::Clog, segno, blknum, probe_lsn, ctx) + + let keyspace = KeySpace::single( + slru_block_to_key(SlruKind::Clog, segno, 0) + ..slru_block_to_key(SlruKind::Clog, segno, nblocks), + ); + + let batches = keyspace.partition( + self.get_shard_identity(), + Timeline::MAX_GET_VECTORED_KEYS * BLCKSZ as u64, + ); + + let io_concurrency = IoConcurrency::spawn_from_conf( + self.conf, + self.gate + .enter() + .map_err(|_| PageReconstructError::Cancelled)?, + ); + + for batch in batches.parts.into_iter().rev() { + let blocks = self + .get_vectored(batch, probe_lsn, io_concurrency.clone(), ctx) .await?; - if clog_page.len() == BLCKSZ as usize + 8 { - let mut timestamp_bytes = [0u8; 8]; - timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]); - let timestamp = TimestampTz::from_be_bytes(timestamp_bytes); + for (_key, clog_page) in blocks.into_iter().rev() { + let clog_page = clog_page?; - match f(timestamp) { - ControlFlow::Break(b) => return Ok(b), - ControlFlow::Continue(()) => (), + if clog_page.len() == BLCKSZ as usize + 8 { + let mut timestamp_bytes = [0u8; 8]; + timestamp_bytes.copy_from_slice(&clog_page[BLCKSZ as usize..]); + let timestamp = TimestampTz::from_be_bytes(timestamp_bytes); + + match f(timestamp) { + ControlFlow::Break(b) => return Ok(b), + ControlFlow::Continue(()) => (), + } } } } From 8c12ccf7291b435bd022bae39b3ea1cd5cced670 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 5 Mar 2025 12:20:18 +0000 Subject: [PATCH 092/207] pageserver: gate previous heatmap behind config flag (#11088) ## Problem On unarchival, we update the previous heatmap with all visible layers. When the primary generates a new heatmap it includes all those layers, so the secondary will download them. Since they're not actually resident on the primary (we didn't call the warm up API), they'll never be evicted, so they remain in the heatmap. This leads to oversized secondary locations like we saw in pre-prod. ## Summary of changes Gate the loading of the previous heatmaps and the heatmap generation on unarchival behind configuration flags. They are disabled by default, but enabled in tests. --- libs/pageserver_api/src/config.rs | 6 ++++++ pageserver/src/config.rs | 13 +++++++++++++ pageserver/src/tenant.rs | 6 +++++- test_runner/fixtures/neon_fixtures.py | 7 +++++-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 039cc1319e..f387ff0579 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -123,6 +123,10 @@ pub struct ConfigToml { pub enable_read_path_debugging: Option, #[serde(skip_serializing_if = "Option::is_none")] pub validate_wal_contiguity: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub load_previous_heatmap: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub generate_unarchival_heatmap: Option, } #[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] @@ -523,6 +527,8 @@ impl Default for ConfigToml { None }, validate_wal_contiguity: None, + load_previous_heatmap: None, + generate_unarchival_heatmap: None, } } } diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 64d00882b9..33ae8c4790 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -194,6 +194,13 @@ pub struct PageServerConf { /// Interpreted protocol feature: if enabled, validate that the logical WAL received from /// safekeepers does not have gaps. pub validate_wal_contiguity: bool, + + /// When set, the previously written to disk heatmap is loaded on tenant attach and used + /// to avoid clobbering the heatmap from new, cold, attached locations. + pub load_previous_heatmap: bool, + + /// When set, include visible layers in the next uploaded heatmaps of an unarchived timeline. + pub generate_unarchival_heatmap: bool, } /// Token for authentication to safekeepers @@ -358,6 +365,8 @@ impl PageServerConf { get_vectored_concurrent_io, enable_read_path_debugging, validate_wal_contiguity, + load_previous_heatmap, + generate_unarchival_heatmap, } = config_toml; let mut conf = PageServerConf { @@ -447,6 +456,8 @@ impl PageServerConf { no_sync: no_sync.unwrap_or(false), enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false), validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false), + load_previous_heatmap: load_previous_heatmap.unwrap_or(false), + generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(false), }; // ------------------------------------------------------------ @@ -493,6 +504,8 @@ impl PageServerConf { metric_collection_interval: Duration::from_secs(60), synthetic_size_calculation_interval: Duration::from_secs(60), background_task_maximum_delay: Duration::ZERO, + load_previous_heatmap: Some(true), + generate_unarchival_heatmap: Some(true), ..Default::default() }; PageServerConf::parse_and_validate(NodeId(0), config_toml, &repo_dir).unwrap() diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index fee007b2d7..3694381078 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1150,7 +1150,7 @@ impl Tenant { // a previous heatmap which contains all visible layers in the layer map. // This previous heatmap will be used whenever a fresh heatmap is generated // for the timeline. - if matches!(cause, LoadTimelineCause::Unoffload) { + if self.conf.generate_unarchival_heatmap && matches!(cause, LoadTimelineCause::Unoffload) { let mut tline_ending_at = Some((&timeline, timeline.get_last_record_lsn())); while let Some((tline, end_lsn)) = tline_ending_at { let unarchival_heatmap = tline.generate_unarchival_heatmap(end_lsn).await; @@ -1582,6 +1582,10 @@ impl Tenant { } async fn read_on_disk_heatmap(&self) -> Option<(HeatMapTenant, std::time::Instant)> { + if !self.conf.load_previous_heatmap { + return None; + } + let on_disk_heatmap_path = self.conf.tenant_heatmap_path(&self.tenant_shard_id); match tokio::fs::read_to_string(on_disk_heatmap_path).await { Ok(heatmap) => match serde_json::from_str::(&heatmap) { diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 3aa018e99e..6171da52a0 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1169,6 +1169,8 @@ class NeonEnv: # Disable pageserver disk syncs in tests: when running tests concurrently, this avoids # the pageserver taking a long time to start up due to syncfs flushing other tests' data "no_sync": True, + # Look for gaps in WAL received from safekeepeers + "validate_wal_contiguity": True, } # Batching (https://github.com/neondatabase/neon/issues/9377): @@ -1181,11 +1183,12 @@ class NeonEnv: if config.test_may_use_compatibility_snapshot_binaries: log.info( - "Skipping WAL contiguity validation to avoid forward-compatibility related test failures" + "Skipping prev heatmap settings to avoid forward-compatibility related test failures" ) else: # Look for gaps in WAL received from safekeepeers - ps_cfg["validate_wal_contiguity"] = True + ps_cfg["load_previous_heatmap"] = True + ps_cfg["generate_unarchival_heatmap"] = True get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io if get_vectored_concurrent_io is not None: From 332aae1484ce87f62897f6fa610c565eb85378d6 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 5 Mar 2025 14:50:35 +0100 Subject: [PATCH 093/207] test_runner/regress: speed up `test_check_visibility_map` (#11086) ## Problem `test_check_visibility_map` is the slowest test in CI, and can cause timeouts under particularly slow configurations (`debug` and `without-lfc`). ## Summary of changes * Reduce the `pgbench` scale factor from 10 to 8. * Omit a redundant vacuum during `pgbench` init. * Remove a final `vacuum freeze` + `pg_check_visible` pass, which has questionable value (we've already done a vacuum freeze previously, and we don't flush the compute cache before checking anyway). --- test_runner/regress/test_vm_bits.py | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/test_runner/regress/test_vm_bits.py b/test_runner/regress/test_vm_bits.py index 4865178ca8..b30c02e0e4 100644 --- a/test_runner/regress/test_vm_bits.py +++ b/test_runner/regress/test_vm_bits.py @@ -327,9 +327,9 @@ def test_check_visibility_map(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): log.info(f"pgbench run {i+1}/{PGBENCH_RUNS}") endpoint.safe_psql(f"create database {dbname}") connstr = endpoint.connstr(dbname=dbname) - # pgbench -i will automatically vacuum the tables. This creates the visibility map. - pg_bin.run(["pgbench", "-i", "-s", "10", connstr]) - # Freeze the tuples to set the initial frozen bit. + # Initialize the data set, but don't vacuum yet. + pg_bin.run(["pgbench", "-i", "-s", "8", "-n", connstr]) + # Vacuum to create the visibility map, and freeze the tuples to set the frozen bit. endpoint.safe_psql("vacuum freeze", dbname=dbname) # Run pgbench. pg_bin.run(["pgbench", "-c", "32", "-j", "8", "-T", "10", connstr]) @@ -354,19 +354,3 @@ def test_check_visibility_map(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin): row = cur.fetchone() assert row is not None assert row[0] == 0, f"{row[0]} inconsistent VM pages (frozen)" - - # Vacuum and freeze the tables, and check that the visibility map is still accurate. - for dbname in dbnames: - log.info(f"Vacuuming and checking visibility map for {dbname}") - with endpoint.cursor(dbname=dbname) as cur: - cur.execute("vacuum freeze") - - cur.execute("select count(*) from pg_check_visible('pgbench_accounts')") - row = cur.fetchone() - assert row is not None - assert row[0] == 0, f"{row[0]} inconsistent VM pages (visible)" - - cur.execute("select count(*) from pg_check_frozen('pgbench_accounts')") - row = cur.fetchone() - assert row is not None - assert row[0] == 0, f"{row[0]} inconsistent VM pages (frozen)" From 94e6897ead78cd1fca7781fa8e4e6c52a519415a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Wed, 5 Mar 2025 15:28:43 +0100 Subject: [PATCH 094/207] fix(ci): make deploy job depend on pushing images to dev registries (#11089) ## Problem If an image fails to push to dev registries, we shouldn't trigger the deploy job, because that depends on images existing in dev registries. To ensure this is the case, the deploy job needs to depend on pushing to dev registries. ## Summary of changes Make `deploy` depend on `push-neon-image-dev` and `push-compute-image-dev`. --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ac6e0634f0..66758ca49f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1061,7 +1061,7 @@ jobs: exit 1 deploy: - needs: [ check-permissions, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ] + needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, push-neon-image-prod, push-compute-image-prod, meta, build-and-test-locally, trigger-custom-extensions-build-and-wait ] # `!failure() && !cancelled()` is required because the workflow depends on the job that can be skipped: `push-neon-image-prod` and `push-compute-image-prod` if: ${{ contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) && !failure() && !cancelled() }} permissions: From 2d45522fa66e3265d08ab8cb317ee7f47eb31c3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 5 Mar 2025 16:45:43 +0100 Subject: [PATCH 095/207] storcon db: load safekeepers from DB again (#11087) Earlier PR #11041 soft-disabled the loading code for safekeepers from the storcon db. This PR makes us load the safekeepers from the database again, now that we have [JWTs available on staging](https://github.com/neondatabase/neon/pull/11087) and soon on prod. This reverts commit 23fb8053c5904d2ede67e09345de429ab56faefc. Part of https://github.com/neondatabase/cloud/issues/24727 --- control_plane/src/local_env.rs | 3 --- control_plane/src/storage_controller.rs | 4 ---- storage_controller/src/main.rs | 5 ----- storage_controller/src/service.rs | 26 +++++++++---------------- test_runner/fixtures/neon_fixtures.py | 7 ------- 5 files changed, 9 insertions(+), 36 deletions(-) diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index da7d7e5469..f4026efbbf 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -165,8 +165,6 @@ pub struct NeonStorageControllerConf { #[serde(with = "humantime_serde")] pub long_reconcile_threshold: Option, - - pub load_safekeepers: bool, } impl NeonStorageControllerConf { @@ -190,7 +188,6 @@ impl Default for NeonStorageControllerConf { max_secondary_lag_bytes: None, heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL, long_reconcile_threshold: None, - load_safekeepers: true, } } } diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 77a9075aa7..16e12f4e02 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -537,10 +537,6 @@ impl StorageController { args.push("--start-as-candidate".to_string()); } - if self.config.load_safekeepers { - args.push("--load-safekeepers".to_string()); - } - if let Some(private_key) = &self.private_key { let claims = Claims::new(None, Scope::PageServerApi); let jwt_token = diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 6ef17c0007..967fb2996f 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -143,10 +143,6 @@ struct Cli { // Flag to use https for requests to pageserver API. #[arg(long, default_value = "false")] use_https_pageserver_api: bool, - - /// Whether to load safekeeprs from the database and heartbeat them - #[arg(long, default_value = "false")] - load_safekeepers: bool, } enum StrictMode { @@ -360,7 +356,6 @@ async fn async_main() -> anyhow::Result<()> { start_as_candidate: args.start_as_candidate, http_service_port: args.listen.port() as i32, use_https_pageserver_api: args.use_https_pageserver_api, - load_safekeepers: args.load_safekeepers, }; // Validate that we can connect to the database diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 8fc7f7a0c5..e12bd299ce 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -394,8 +394,6 @@ pub struct Config { pub long_reconcile_threshold: Duration, pub use_https_pageserver_api: bool, - - pub load_safekeepers: bool, } impl From for ApiError { @@ -1412,20 +1410,15 @@ impl Service { .set(nodes.len() as i64); tracing::info!("Loading safekeepers from database..."); - let safekeepers = if config.load_safekeepers { - persistence - .list_safekeepers() - .await? - .into_iter() - .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new())) - .collect::>() - } else { - tracing::info!("Skipping safekeeper loading"); - Default::default() - }; - + let safekeepers = persistence + .list_safekeepers() + .await? + .into_iter() + .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new())) + .collect::>(); let safekeepers: HashMap = safekeepers.into_iter().map(|n| (n.get_id(), n)).collect(); + tracing::info!("Loaded {} safekeepers from database.", safekeepers.len()); tracing::info!("Loading shards from database..."); let mut tenant_shard_persistence = persistence.load_active_tenant_shards().await?; @@ -8066,8 +8059,7 @@ impl Service { ) -> Result<(), DatabaseError> { let node_id = NodeId(record.id as u64); self.persistence.safekeeper_upsert(record.clone()).await?; - - if self.config.load_safekeepers { + { let mut locked = self.inner.write().unwrap(); let mut safekeepers = (*locked.safekeepers).clone(); match safekeepers.entry(node_id) { @@ -8099,7 +8091,7 @@ impl Service { .await?; let node_id = NodeId(id as u64); // After the change has been persisted successfully, update the in-memory state - if self.config.load_safekeepers { + { let mut locked = self.inner.write().unwrap(); let mut safekeepers = (*locked.safekeepers).clone(); let sk = safekeepers diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 6171da52a0..ef9d8cb46f 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1133,13 +1133,6 @@ class NeonEnv: if self.storage_controller_config is not None: cfg["storage_controller"] = self.storage_controller_config - # Disable new storcon flag in compat tests - if config.test_may_use_compatibility_snapshot_binaries: - if "storage_controller" in cfg: - cfg["storage_controller"]["load_safekeepers"] = False - else: - cfg["storage_controller"] = {"load_safekeepers": False} - # Create config for pageserver http_auth_type = "NeonJWT" if config.auth_enabled else "Trust" pg_auth_type = "NeonJWT" if config.auth_enabled else "Trust" From 9cdc8c0e6c7adf9bf31ec3cff6f8a978833e528a Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 5 Mar 2025 10:57:38 -0500 Subject: [PATCH 096/207] feat(pageserver): revisit error types for gc-compaction (#11082) ## Problem part of https://github.com/neondatabase/neon/issues/9114 We used anyhow::Error everywhere and it's time to fix. ## Summary of changes * Make sure that cancel errors are correctly propagated as CompactionError::ShuttingDown. * Skip all the trigger computation work if gc_cutoff is not generated yet. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 182 ++++++++++++++----- 1 file changed, 134 insertions(+), 48 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 76c28e11ab..17f7d96e5e 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -15,7 +15,7 @@ use super::{ Timeline, }; -use anyhow::{Context, anyhow, bail}; +use anyhow::{Context, anyhow}; use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; @@ -234,6 +234,12 @@ impl GcCompactionQueue { // it enough in staging yet. return Ok(()); } + if timeline.get_gc_compaction_watermark() == Lsn::INVALID { + // If the gc watermark is not set, we don't need to trigger auto compaction. + // This check is the same as in `gc_compaction_split_jobs` but we don't log + // here and we can also skip the computation of the trigger condition earlier. + return Ok(()); + } let Ok(permit) = CONCURRENT_GC_COMPACTION_TASKS.clone().try_acquire_owned() else { // Only allow one compaction run at a time. TODO: As we do `try_acquire_owned`, we cannot ensure @@ -357,8 +363,7 @@ impl GcCompactionQueue { GcCompactJob::from_compact_options(options.clone()), options.sub_compaction_max_job_size_mb, ) - .await - .map_err(CompactionError::Other)?; + .await?; if jobs.is_empty() { info!("no jobs to run, skipping scheduled compaction task"); self.notify_and_unblock(id); @@ -825,9 +830,7 @@ impl Timeline { .flags .contains(CompactFlags::EnhancedGcBottomMostCompaction) { - self.compact_with_gc(cancel, options, ctx) - .await - .map_err(CompactionError::Other)?; + self.compact_with_gc(cancel, options, ctx).await?; return Ok(CompactionOutcome::Done); } @@ -2345,12 +2348,19 @@ impl Timeline { async fn check_compaction_space( self: &Arc, layer_selection: &[Layer], - ) -> anyhow::Result<()> { - let available_space = self.check_available_space().await?; + ) -> Result<(), CompactionError> { + let available_space = self + .check_available_space() + .await + .map_err(CompactionError::Other)?; let mut remote_layer_size = 0; let mut all_layer_size = 0; for layer in layer_selection { - let needs_download = layer.needs_download().await?; + let needs_download = layer + .needs_download() + .await + .context("failed to check if layer needs download") + .map_err(CompactionError::Other)?; if needs_download.is_some() { remote_layer_size += layer.layer_desc().file_size; } @@ -2359,14 +2369,14 @@ impl Timeline { let allocated_space = (available_space as f64 * 0.8) as u64; /* reserve 20% space for other tasks */ if all_layer_size /* space needed for newly-generated file */ + remote_layer_size /* space for downloading layers */ > allocated_space { - return Err(anyhow!( + return Err(CompactionError::Other(anyhow!( "not enough space for compaction: available_space={}, allocated_space={}, all_layer_size={}, remote_layer_size={}, required_space={}", available_space, allocated_space, all_layer_size, remote_layer_size, all_layer_size + remote_layer_size - )); + ))); } Ok(()) } @@ -2397,7 +2407,7 @@ impl Timeline { self: &Arc, job: GcCompactJob, sub_compaction_max_job_size_mb: Option, - ) -> anyhow::Result> { + ) -> Result, CompactionError> { let compact_below_lsn = if job.compact_lsn_range.end != Lsn::MAX { job.compact_lsn_range.end } else { @@ -2548,7 +2558,7 @@ impl Timeline { cancel: &CancellationToken, options: CompactOptions, ctx: &RequestContext, - ) -> anyhow::Result<()> { + ) -> Result<(), CompactionError> { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); if sub_compaction { @@ -2580,7 +2590,7 @@ impl Timeline { cancel: &CancellationToken, job: GcCompactJob, ctx: &RequestContext, - ) -> anyhow::Result<()> { + ) -> Result<(), CompactionError> { // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. @@ -2588,8 +2598,7 @@ impl Timeline { let gc_lock = async { tokio::select! { guard = self.gc_lock.lock() => Ok(guard), - // TODO: refactor to CompactionError to correctly pass cancelled error - _ = cancel.cancelled() => Err(anyhow!("cancelled")), + _ = cancel.cancelled() => Err(CompactionError::ShuttingDown), } }; @@ -2810,10 +2819,10 @@ impl Timeline { .map(|layer| layer.layer_desc().layer_name()) .collect_vec(); if let Some(err) = check_valid_layermap(&layer_names) { - bail!( + return Err(CompactionError::Other(anyhow!( "gc-compaction layer map check failed because {}, cannot proceed with compaction due to potential data loss", err - ); + ))); } // The maximum LSN we are processing in this compaction loop let end_lsn = job_desc @@ -2828,11 +2837,24 @@ impl Timeline { let mut total_downloaded_size = 0; let mut total_layer_size = 0; for layer in &job_desc.selected_layers { - if layer.needs_download().await?.is_some() { + if layer + .needs_download() + .await + .context("failed to check if layer needs download") + .map_err(CompactionError::Other)? + .is_some() + { total_downloaded_size += layer.layer_desc().file_size; } total_layer_size += layer.layer_desc().file_size; - let resident_layer = layer.download_and_keep_resident(ctx).await?; + if cancel.is_cancelled() { + return Err(CompactionError::ShuttingDown); + } + let resident_layer = layer + .download_and_keep_resident(ctx) + .await + .context("failed to download and keep resident layer") + .map_err(CompactionError::Other)?; downloaded_layers.push(resident_layer); } info!( @@ -2843,19 +2865,33 @@ impl Timeline { ); for resident_layer in &downloaded_layers { if resident_layer.layer_desc().is_delta() { - let layer = resident_layer.get_as_delta(ctx).await?; + let layer = resident_layer + .get_as_delta(ctx) + .await + .context("failed to get delta layer") + .map_err(CompactionError::Other)?; delta_layers.push(layer); } else { - let layer = resident_layer.get_as_image(ctx).await?; + let layer = resident_layer + .get_as_image(ctx) + .await + .context("failed to get image layer") + .map_err(CompactionError::Other)?; image_layers.push(layer); } } - let (dense_ks, sparse_ks) = self.collect_gc_compaction_keyspace().await?; + let (dense_ks, sparse_ks) = self + .collect_gc_compaction_keyspace() + .await + .context("failed to collect gc compaction keyspace") + .map_err(CompactionError::Other)?; let mut merge_iter = FilterIterator::create( MergeIterator::create(&delta_layers, &image_layers, ctx), dense_ks, sparse_ks, - )?; + ) + .context("failed to create filter iterator") + .map_err(CompactionError::Other)?; // Step 2: Produce images+deltas. let mut accumulated_values = Vec::new(); @@ -2874,7 +2910,9 @@ impl Timeline { self.get_compaction_target_size(), ctx, ) - .await?, + .await + .context("failed to create image layer writer") + .map_err(CompactionError::Other)?, ) } else { None @@ -2887,7 +2925,9 @@ impl Timeline { lowest_retain_lsn..end_lsn, self.get_compaction_target_size(), ) - .await?; + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?; #[derive(Default)] struct RewritingLayers { @@ -2927,9 +2967,14 @@ impl Timeline { // the key and LSN range are determined. However, to keep things simple here, we still // create this writer, and discard the writer in the end. - while let Some(((key, lsn, val), desc)) = merge_iter.next_with_trace().await? { + while let Some(((key, lsn, val), desc)) = merge_iter + .next_with_trace() + .await + .context("failed to get next key-value pair") + .map_err(CompactionError::Other)? + { if cancel.is_cancelled() { - return Err(anyhow!("cancelled")); // TODO: refactor to CompactionError and pass cancel error + return Err(CompactionError::ShuttingDown); } if self.shard_identity.is_key_disposable(&key) { // If this shard does not need to store this key, simply skip it. @@ -2960,7 +3005,9 @@ impl Timeline { desc.lsn_range.clone(), ctx, ) - .await?, + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?, ); } rewriter.before.as_mut().unwrap() @@ -2975,14 +3022,20 @@ impl Timeline { desc.lsn_range.clone(), ctx, ) - .await?, + .await + .context("failed to create delta layer writer") + .map_err(CompactionError::Other)?, ); } rewriter.after.as_mut().unwrap() } else { unreachable!() }; - rewriter.put_value(key, lsn, val, ctx).await?; + rewriter + .put_value(key, lsn, val, ctx) + .await + .context("failed to put value") + .map_err(CompactionError::Other)?; continue; } match val { @@ -3005,9 +3058,13 @@ impl Timeline { &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, get_ancestor_image(self, *last_key, ctx, has_data_below, lowest_retain_lsn) - .await?, + .await + .context("failed to get ancestor image") + .map_err(CompactionError::Other)?, ) - .await?; + .await + .context("failed to generate key retention") + .map_err(CompactionError::Other)?; retention .pipe_to( *last_key, @@ -3016,7 +3073,9 @@ impl Timeline { &mut stat, ctx, ) - .await?; + .await + .context("failed to pipe to delta layer writer") + .map_err(CompactionError::Other)?; accumulated_values.clear(); *last_key = key; accumulated_values.push((key, lsn, val)); @@ -3034,9 +3093,14 @@ impl Timeline { job_desc.gc_cutoff, &job_desc.retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, - get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn).await?, + get_ancestor_image(self, last_key, ctx, has_data_below, lowest_retain_lsn) + .await + .context("failed to get ancestor image") + .map_err(CompactionError::Other)?, ) - .await?; + .await + .context("failed to generate key retention") + .map_err(CompactionError::Other)?; retention .pipe_to( last_key, @@ -3045,7 +3109,9 @@ impl Timeline { &mut stat, ctx, ) - .await?; + .await + .context("failed to pipe to delta layer writer") + .map_err(CompactionError::Other)?; // end: move the above part to the loop body let mut rewrote_delta_layers = Vec::new(); @@ -3053,13 +3119,23 @@ impl Timeline { if let Some(delta_writer_before) = writers.before { let (desc, path) = delta_writer_before .finish(job_desc.compaction_key_range.start, ctx) - .await?; - let layer = Layer::finish_creating(self.conf, self, desc, &path)?; + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)?; + let layer = Layer::finish_creating(self.conf, self, desc, &path) + .context("failed to finish creating delta layer") + .map_err(CompactionError::Other)?; rewrote_delta_layers.push(layer); } if let Some(delta_writer_after) = writers.after { - let (desc, path) = delta_writer_after.finish(key.key_range.end, ctx).await?; - let layer = Layer::finish_creating(self.conf, self, desc, &path)?; + let (desc, path) = delta_writer_after + .finish(key.key_range.end, ctx) + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)?; + let layer = Layer::finish_creating(self.conf, self, desc, &path) + .context("failed to finish creating delta layer") + .map_err(CompactionError::Other)?; rewrote_delta_layers.push(layer); } } @@ -3074,7 +3150,9 @@ impl Timeline { let end_key = job_desc.compaction_key_range.end; writer .finish_with_discard_fn(self, ctx, end_key, discard) - .await? + .await + .context("failed to finish image layer writer") + .map_err(CompactionError::Other)? } else { drop(writer); Vec::new() @@ -3086,7 +3164,9 @@ impl Timeline { let produced_delta_layers = if !dry_run { delta_layer_writer .finish_with_discard_fn(self, ctx, discard) - .await? + .await + .context("failed to finish delta layer writer") + .map_err(CompactionError::Other)? } else { drop(delta_layer_writer); Vec::new() @@ -3166,7 +3246,9 @@ impl Timeline { &layer.layer_desc().key_range, &job_desc.compaction_key_range, ) { - bail!("violated constraint: image layer outside of compaction key range"); + return Err(CompactionError::Other(anyhow!( + "violated constraint: image layer outside of compaction key range" + ))); } if !fully_contains( &job_desc.compaction_key_range, @@ -3181,7 +3263,9 @@ impl Timeline { info!( "gc-compaction statistics: {}", - serde_json::to_string(&stat)? + serde_json::to_string(&stat) + .context("failed to serialize gc-compaction statistics") + .map_err(CompactionError::Other)? ); if dry_run { @@ -3220,10 +3304,10 @@ impl Timeline { // the writer, so potentially, we will need a function like `ImageLayerBatchWriter::get_all_pending_layer_keys` to get all the keys that are // in the writer before finalizing the persistent layers. Now we would leave some dangling layers on the disk if the check fails. if let Some(err) = check_valid_layermap(&final_layers) { - bail!( + return Err(CompactionError::Other(anyhow!( "gc-compaction layer map check failed after compaction because {}, compaction result not applied to the layer map due to potential data loss", err - ); + ))); } // Between the sanity check and this compaction update, there could be new layers being flushed, but it should be fine because we only @@ -3275,7 +3359,9 @@ impl Timeline { // find_gc_cutoffs will try accessing things below the cutoff. TODO: ideally, this should // be batched into `schedule_compaction_update`. let disk_consistent_lsn = self.disk_consistent_lsn.load(); - self.schedule_uploads(disk_consistent_lsn, None)?; + self.schedule_uploads(disk_consistent_lsn, None) + .context("failed to schedule uploads") + .map_err(CompactionError::Other)?; // If a layer gets rewritten throughout gc-compaction, we need to keep that layer only in `compact_to` instead // of `compact_from`. let compact_from = { From d94fc75cfc154d3b5daafcd337cff497b223fe03 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Wed, 5 Mar 2025 18:01:00 +0000 Subject: [PATCH 097/207] Setup compute_ctl pgaudit and rsyslog (#10615) Setup pgaudit and pgauditlogtofile extensions in compute_ctl when the ComputeAuditLogLevel is set to 'hipaa'. See cloud PR https://github.com/neondatabase/cloud/pull/24568 Add rsyslog setup for compute_ctl. Spin up a rsyslog server in the compute VM, and configure it to send logs to the endpoint specified in AUDIT_LOGGING_ENDPOINT env. --- compute/compute-node.Dockerfile | 10 +++ compute/vm-image-spec-bookworm.yaml | 2 +- compute/vm-image-spec-bullseye.yaml | 2 +- compute_tools/src/compute.rs | 22 ++++- compute_tools/src/config.rs | 56 ++++++++++++- .../compute_rsyslog_template.conf | 10 +++ compute_tools/src/lib.rs | 1 + compute_tools/src/rsyslog.rs | 80 +++++++++++++++++++ compute_tools/src/spec_apply.rs | 45 +++++++++-- control_plane/src/endpoint.rs | 4 +- libs/compute_api/src/spec.rs | 21 +++++ 11 files changed, 241 insertions(+), 12 deletions(-) create mode 100644 compute_tools/src/config_template/compute_rsyslog_template.conf create mode 100644 compute_tools/src/rsyslog.rs diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index c3aecfbdc5..a7e8718ea9 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1933,6 +1933,7 @@ RUN apt update && \ locales \ procps \ ca-certificates \ + rsyslog \ $VERSION_INSTALLS && \ apt clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* && \ localedef -i en_US -c -f UTF-8 -A /usr/share/locale/locale.alias en_US.UTF-8 @@ -1978,6 +1979,15 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neo # Make the libraries we built available RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig +# rsyslog config permissions +RUN chown postgres:postgres /etc/rsyslog.conf && \ + touch /etc/compute_rsyslog.conf && \ + chown -R postgres:postgres /etc/compute_rsyslog.conf && \ + # directory for rsyslogd pid file + mkdir /var/run/rsyslogd && \ + chown -R postgres:postgres /var/run/rsyslogd + + ENV LANG=en_US.utf8 USER postgres ENTRYPOINT ["/usr/local/bin/compute_ctl"] diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml index ff4c3387d9..74ff3a8b6d 100644 --- a/compute/vm-image-spec-bookworm.yaml +++ b/compute/vm-image-spec-bookworm.yaml @@ -54,7 +54,7 @@ files: # regardless of hostname (ALL) # # Also allow it to shut down the VM. The fast_import job does that when it's finished. - postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff + postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd - filename: cgconfig.conf content: | # Configuration for cgroups in VM compute nodes diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml index c001040bc9..c1787ab018 100644 --- a/compute/vm-image-spec-bullseye.yaml +++ b/compute/vm-image-spec-bullseye.yaml @@ -54,7 +54,7 @@ files: # regardless of hostname (ALL) # # Also allow it to shut down the VM. The fast_import job does that when it's finished. - postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff + postgres ALL=(root) NOPASSWD: /neonvm/bin/resize-swap, /neonvm/bin/set-disk-quota, /neonvm/bin/poweroff, /usr/sbin/rsyslogd - filename: cgconfig.conf content: | # Configuration for cgroups in VM compute nodes diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index d0b1bc2534..ddcfe12330 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -12,7 +12,9 @@ use anyhow::{Context, Result}; use chrono::{DateTime, Utc}; use compute_api::privilege::Privilege; use compute_api::responses::{ComputeCtlConfig, ComputeMetrics, ComputeStatus}; -use compute_api::spec::{ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent}; +use compute_api::spec::{ + ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, ExtVersion, PgIdent, +}; use futures::StreamExt; use futures::future::join_all; use futures::stream::FuturesUnordered; @@ -35,6 +37,7 @@ use crate::logger::startup_context_from_env; use crate::lsn_lease::launch_lsn_lease_bg_task_for_static; use crate::monitor::launch_monitor; use crate::pg_helpers::*; +use crate::rsyslog::configure_and_start_rsyslog; use crate::spec::*; use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; @@ -676,6 +679,23 @@ impl ComputeNode { }); } + // Configure and start rsyslog if necessary + if let ComputeAudit::Hipaa = pspec.spec.audit_log_level { + let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string()); + if remote_endpoint.is_empty() { + anyhow::bail!("AUDIT_LOGGING_ENDPOINT is empty"); + } + + let log_directory_path = Path::new(&self.params.pgdata).join("log"); + // TODO: make this more robust + // now rsyslog starts once and there is no monitoring or restart if it fails + configure_and_start_rsyslog( + log_directory_path.to_str().unwrap(), + "hipaa", + &remote_endpoint, + )?; + } + // Launch remaining service threads let _monitor_handle = launch_monitor(self); let _configurator_handle = launch_configurator(self); diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index ca24ff76b3..0760568ff8 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -1,3 +1,4 @@ +use anyhow::Result; use std::fmt::Write as FmtWrite; use std::fs::{File, OpenOptions}; use std::io; @@ -5,10 +6,11 @@ use std::io::Write; use std::io::prelude::*; use std::path::Path; -use anyhow::Result; -use compute_api::spec::{ComputeMode, ComputeSpec, GenericOption}; +use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption}; -use crate::pg_helpers::{GenericOptionExt, PgOptionsSerialize, escape_conf_value}; +use crate::pg_helpers::{ + GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value, +}; /// Check that `line` is inside a text file and put it there if it is not. /// Create file if it doesn't exist. @@ -138,6 +140,54 @@ pub fn write_postgres_conf( writeln!(file, "# Managed by compute_ctl: end")?; } + // If audit logging is enabled, configure pgaudit. + // + // Note, that this is called after the settings from spec are written. + // This way we always override the settings from the spec + // and don't allow the user or the control plane admin to change them. + if let ComputeAudit::Hipaa = spec.audit_log_level { + writeln!(file, "# Managed by compute_ctl audit settings: begin")?; + // This log level is very verbose + // but this is necessary for HIPAA compliance. + writeln!(file, "pgaudit.log='all'")?; + writeln!(file, "pgaudit.log_parameter=on")?; + // Disable logging of catalog queries + // The catalog doesn't contain sensitive data, so we don't need to audit it. + writeln!(file, "pgaudit.log_catalog=off")?; + // Set log rotation to 5 minutes + // TODO: tune this after performance testing + writeln!(file, "pgaudit.log_rotation_age=5")?; + + // Add audit shared_preload_libraries, if they are not present. + // + // The caller who sets the flag is responsible for ensuring that the necessary + // shared_preload_libraries are present in the compute image, + // otherwise the compute start will fail. + if let Some(libs) = spec.cluster.settings.find("shared_preload_libraries") { + let mut extra_shared_preload_libraries = String::new(); + if !libs.contains("pgaudit") { + extra_shared_preload_libraries.push_str(",pgaudit"); + } + if !libs.contains("pgauditlogtofile") { + extra_shared_preload_libraries.push_str(",pgauditlogtofile"); + } + writeln!( + file, + "shared_preload_libraries='{}{}'", + libs, extra_shared_preload_libraries + )?; + } else { + // Typically, this should be unreacheable, + // because we always set at least some shared_preload_libraries in the spec + // but let's handle it explicitly anyway. + writeln!( + file, + "shared_preload_libraries='neon,pgaudit,pgauditlogtofile'" + )?; + } + writeln!(file, "# Managed by compute_ctl audit settings: end")?; + } + writeln!(file, "neon.extension_server_port={}", extension_server_port)?; if spec.drop_subscriptions_before_start { diff --git a/compute_tools/src/config_template/compute_rsyslog_template.conf b/compute_tools/src/config_template/compute_rsyslog_template.conf new file mode 100644 index 0000000000..bef3c36446 --- /dev/null +++ b/compute_tools/src/config_template/compute_rsyslog_template.conf @@ -0,0 +1,10 @@ +# Load imfile module to read log files +module(load="imfile") + +# Input configuration for log files in the specified directory +# Replace {log_directory} with the directory containing the log files +input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0") +global(workDirectory="/var/log") + +# Forward logs to remote syslog server +*.* @@{remote_endpoint} \ No newline at end of file diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index b08df22134..5c78bbcd02 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -21,6 +21,7 @@ mod migration; pub mod monitor; pub mod params; pub mod pg_helpers; +pub mod rsyslog; pub mod spec; mod spec_apply; pub mod swap; diff --git a/compute_tools/src/rsyslog.rs b/compute_tools/src/rsyslog.rs new file mode 100644 index 0000000000..776ff14644 --- /dev/null +++ b/compute_tools/src/rsyslog.rs @@ -0,0 +1,80 @@ +use std::process::Command; +use std::{fs::OpenOptions, io::Write}; + +use anyhow::{Context, Result}; +use tracing::info; + +fn get_rsyslog_pid() -> Option { + let output = Command::new("pgrep") + .arg("rsyslogd") + .output() + .expect("Failed to execute pgrep"); + + if !output.stdout.is_empty() { + let pid = std::str::from_utf8(&output.stdout) + .expect("Invalid UTF-8 in process output") + .trim() + .to_string(); + Some(pid) + } else { + None + } +} + +// Start rsyslogd with the specified configuration file +// If it is already running, do nothing. +fn start_rsyslog(rsyslog_conf_path: &str) -> Result<()> { + let pid = get_rsyslog_pid(); + if let Some(pid) = pid { + info!("rsyslogd is already running with pid: {}", pid); + return Ok(()); + } + + let _ = Command::new("/usr/sbin/rsyslogd") + .arg("-f") + .arg(rsyslog_conf_path) + .arg("-i") + .arg("/var/run/rsyslogd/rsyslogd.pid") + .output() + .context("Failed to start rsyslogd")?; + + // Check that rsyslogd is running + if let Some(pid) = get_rsyslog_pid() { + info!("rsyslogd started successfully with pid: {}", pid); + } else { + return Err(anyhow::anyhow!("Failed to start rsyslogd")); + } + + Ok(()) +} + +pub fn configure_and_start_rsyslog( + log_directory: &str, + tag: &str, + remote_endpoint: &str, +) -> Result<()> { + let config_content: String = format!( + include_str!("config_template/compute_rsyslog_template.conf"), + log_directory = log_directory, + tag = tag, + remote_endpoint = remote_endpoint + ); + + info!("rsyslog config_content: {}", config_content); + + let rsyslog_conf_path = "/etc/compute_rsyslog.conf"; + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(rsyslog_conf_path)?; + + file.write_all(config_content.as_bytes())?; + + info!("rsyslog configuration added successfully. Starting rsyslogd"); + + // start the service, using the configuration + start_rsyslog(rsyslog_conf_path)?; + + Ok(()) +} diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs index f9a37c5c98..dbc02c8d02 100644 --- a/compute_tools/src/spec_apply.rs +++ b/compute_tools/src/spec_apply.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use anyhow::{Context, Result}; use compute_api::responses::ComputeStatus; -use compute_api::spec::{ComputeFeature, ComputeSpec, Database, PgIdent, Role}; +use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeSpec, Database, PgIdent, Role}; use futures::future::join_all; use tokio::sync::RwLock; use tokio_postgres::Client; @@ -19,10 +19,10 @@ use crate::pg_helpers::{ get_existing_roles_async, }; use crate::spec_apply::ApplySpecPhase::{ - CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateSchemaNeon, - CreateSuperUser, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, - HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, - RunInEachDatabase, + CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension, + CreatePgauditlogtofileExtension, CreateSchemaNeon, CreateSuperUser, DisablePostgresDBPgAudit, + DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, HandleNeonExtension, + HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase, }; use crate::spec_apply::PerDatabasePhase::{ ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension, @@ -277,6 +277,19 @@ impl ComputeNode { phases.push(FinalizeDropLogicalSubscriptions); } + // Keep DisablePostgresDBPgAudit phase at the end, + // so that all config operations are audit logged. + match spec.audit_log_level + { + ComputeAudit::Hipaa => { + phases.push(CreatePgauditExtension); + phases.push(CreatePgauditlogtofileExtension); + phases.push(DisablePostgresDBPgAudit); + } + ComputeAudit::Log => { /* not implemented yet */ } + ComputeAudit::Disabled => {} + } + for phase in phases { debug!("Applying phase {:?}", &phase); apply_operations( @@ -463,6 +476,9 @@ pub enum ApplySpecPhase { CreateAndAlterDatabases, CreateSchemaNeon, RunInEachDatabase { db: DB, subphase: PerDatabasePhase }, + CreatePgauditExtension, + CreatePgauditlogtofileExtension, + DisablePostgresDBPgAudit, HandleOtherExtensions, HandleNeonExtension, CreateAvailabilityCheck, @@ -1098,6 +1114,25 @@ async fn get_operations<'a>( } Ok(Box::new(empty())) } + ApplySpecPhase::CreatePgauditExtension => Ok(Box::new(once(Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS pgaudit"), + comment: Some(String::from("create pgaudit extensions")), + }))), + ApplySpecPhase::CreatePgauditlogtofileExtension => Ok(Box::new(once(Operation { + query: String::from("CREATE EXTENSION IF NOT EXISTS pgauditlogtofile"), + comment: Some(String::from("create pgauditlogtofile extensions")), + }))), + // Disable pgaudit logging for postgres database. + // Postgres is neon system database used by monitors + // and compute_ctl tuning functions and thus generates a lot of noise. + // We do not consider data stored in this database as sensitive. + ApplySpecPhase::DisablePostgresDBPgAudit => { + let query = "ALTER DATABASE postgres SET pgaudit.log to 'none'"; + Ok(Box::new(once(Operation { + query: query.to_string(), + comment: Some(query.to_string()), + }))) + } ApplySpecPhase::HandleNeonExtension => { let operations = vec![ Operation { diff --git a/control_plane/src/endpoint.rs b/control_plane/src/endpoint.rs index 87bfbd7570..b46d616827 100644 --- a/control_plane/src/endpoint.rs +++ b/control_plane/src/endpoint.rs @@ -48,7 +48,8 @@ use anyhow::{Context, Result, anyhow, bail}; use compute_api::requests::ConfigurationRequest; use compute_api::responses::{ComputeCtlConfig, ComputeStatus, ComputeStatusResponse}; use compute_api::spec::{ - Cluster, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, RemoteExtSpec, Role, + Cluster, ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, Database, PgIdent, + RemoteExtSpec, Role, }; use nix::sys::signal::{Signal, kill}; use pageserver_api::shard::ShardStripeSize; @@ -668,6 +669,7 @@ impl Endpoint { local_proxy_config: None, reconfigure_concurrency: self.reconfigure_concurrency, drop_subscriptions_before_start: self.drop_subscriptions_before_start, + audit_log_level: ComputeAudit::Disabled, }; // this strange code is needed to support respec() in tests diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index df82d8b449..77f2e1e631 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -155,6 +155,16 @@ pub struct ComputeSpec { /// over the same replication content from publisher. #[serde(default)] // Default false pub drop_subscriptions_before_start: bool, + + /// Log level for audit logging: + /// + /// Disabled - no audit logging. This is the default. + /// log - log masked statements to the postgres log using pgaudit extension + /// hipaa - log unmasked statements to the file using pgaudit and pgauditlogtofile extension + /// + /// Extensions should be present in shared_preload_libraries + #[serde(default)] + pub audit_log_level: ComputeAudit, } /// Feature flag to signal `compute_ctl` to enable certain experimental functionality. @@ -262,6 +272,17 @@ pub enum ComputeMode { Replica, } +/// Log level for audit logging +/// Disabled, log, hipaa +/// Default is Disabled +#[derive(Clone, Debug, Default, Eq, PartialEq, Deserialize, Serialize)] +pub enum ComputeAudit { + #[default] + Disabled, + Log, + Hipaa, +} + #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] pub struct Cluster { pub cluster_id: Option, From 8263107f6c67c86a4e1a641129bad42cb88b2557 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Wed, 5 Mar 2025 19:17:57 +0100 Subject: [PATCH 098/207] feat(compute): Add filename label to remote ext requests metric (#11091) ## Problem We realized that we may use this metric for more 'live' info about extension installations vs. what we have with installed extensions metric, which is only updated at start, atm. ## Summary of changes Add `filename` label to `compute_ctl_remote_ext_requests_total`. Note that it contains the raw archive name with `.tar.zst` at the end, so the consumer may need to strip this suffix. Closes https://github.com/neondatabase/cloud/issues/24694 --- compute_tools/src/extension_server.rs | 18 +++++++++++------- compute_tools/src/metrics.rs | 4 +--- .../regress/test_download_extensions.py | 2 ++ 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs index 77e98359ab..b4de786b00 100644 --- a/compute_tools/src/extension_server.rs +++ b/compute_tools/src/extension_server.rs @@ -253,27 +253,31 @@ pub fn create_control_files(remote_extensions: &RemoteExtSpec, pgbin: &str) { } } -// Do request to extension storage proxy, i.e. +// Do request to extension storage proxy, e.g., // curl http://pg-ext-s3-gateway/latest/v15/extensions/anon.tar.zst -// using HHTP GET -// and return the response body as bytes -// +// using HTTP GET and return the response body as bytes. async fn download_extension_tar(ext_remote_storage: &str, ext_path: &str) -> Result { let uri = format!("{}/{}", ext_remote_storage, ext_path); + let filename = Path::new(ext_path) + .file_name() + .unwrap_or_else(|| std::ffi::OsStr::new("unknown")) + .to_str() + .unwrap_or("unknown") + .to_string(); - info!("Download extension {} from uri {}", ext_path, uri); + info!("Downloading extension file '{}' from uri {}", filename, uri); match do_extension_server_request(&uri).await { Ok(resp) => { info!("Successfully downloaded remote extension data {}", ext_path); REMOTE_EXT_REQUESTS_TOTAL - .with_label_values(&[&StatusCode::OK.to_string()]) + .with_label_values(&[&StatusCode::OK.to_string(), &filename]) .inc(); Ok(resp) } Err((msg, status)) => { REMOTE_EXT_REQUESTS_TOTAL - .with_label_values(&[&status]) + .with_label_values(&[&status, &filename]) .inc(); bail!(msg); } diff --git a/compute_tools/src/metrics.rs b/compute_tools/src/metrics.rs index bc96e5074c..dab32d5dc1 100644 --- a/compute_tools/src/metrics.rs +++ b/compute_tools/src/metrics.rs @@ -54,9 +54,7 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy = Lazy::new(|| register_int_counter_vec!( "compute_ctl_remote_ext_requests_total", "Total number of requests made by compute_ctl to download extensions from S3 proxy by status", - // Do not use any labels like extension name yet. - // We can add them later if needed. - &["http_status"] + &["http_status", "filename"] ) .expect("failed to define a metric") }); diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py index 7f12c14073..2ff525464d 100644 --- a/test_runner/regress/test_download_extensions.py +++ b/test_runner/regress/test_download_extensions.py @@ -137,6 +137,8 @@ def test_remote_extensions( metrics = parse_metrics(raw_metrics) remote_ext_requests = metrics.query_all( "compute_ctl_remote_ext_requests_total", + # Check that we properly report the filename in the metrics + {"filename": "anon.tar.zst"}, ) assert len(remote_ext_requests) == 1 for sample in remote_ext_requests: From d599d2df8065bbb2d9090d259baf0ed0504b4c50 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Wed, 5 Mar 2025 12:32:45 -0600 Subject: [PATCH 099/207] Update postgres_exporter to 0.17.1 (#11094) Signed-off-by: Tristan Partin --- compute/compute-node.Dockerfile | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index a7e8718ea9..61d9d59f79 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1484,7 +1484,7 @@ WORKDIR /ext-src COPY compute/patches/pg_duckdb_v031.patch . COPY compute/patches/duckdb_v120.patch . # pg_duckdb build requires source dir to be a git repo to get submodules -# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: +# allow neon_superuser to execute some functions that in pg_duckdb are available to superuser only: # - extension management function duckdb.install_extension() # - access to duckdb.extensions table and its sequence RUN git clone --depth 1 --branch v0.3.1 https://github.com/duckdb/pg_duckdb.git pg_duckdb-src && \ @@ -1499,8 +1499,8 @@ ARG PG_VERSION COPY --from=pg_duckdb-src /ext-src/ /ext-src/ WORKDIR /ext-src/pg_duckdb-src RUN make install -j $(getconf _NPROCESSORS_ONLN) && \ - echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control - + echo 'trusted = true' >> /usr/local/pgsql/share/extension/pg_duckdb.control + ######################################################################################### # # Layer "pg_repack" @@ -1758,15 +1758,15 @@ ARG TARGETARCH # test_runner/regress/test_compute_metrics.py # See comment on the top of the file regading `echo`, `-e` and `\n` RUN if [ "$TARGETARCH" = "amd64" ]; then\ - postgres_exporter_sha256='027e75dda7af621237ff8f5ac66b78a40b0093595f06768612b92b1374bd3105';\ + postgres_exporter_sha256='59aa4a7bb0f7d361f5e05732f5ed8c03cc08f78449cef5856eadec33a627694b';\ pgbouncer_exporter_sha256='c9f7cf8dcff44f0472057e9bf52613d93f3ffbc381ad7547a959daa63c5e84ac';\ sql_exporter_sha256='38e439732bbf6e28ca4a94d7bc3686d3fa1abdb0050773d5617a9efdb9e64d08';\ else\ - postgres_exporter_sha256='131a376d25778ff9701a4c81f703f179e0b58db5c2c496e66fa43f8179484786';\ + postgres_exporter_sha256='d1dedea97f56c6d965837bfd1fbb3e35a3b4a4556f8cccee8bd513d8ee086124';\ pgbouncer_exporter_sha256='217c4afd7e6492ae904055bc14fe603552cf9bac458c063407e991d68c519da3';\ sql_exporter_sha256='11918b00be6e2c3a67564adfdb2414fdcbb15a5db76ea17d1d1a944237a893c6';\ fi\ - && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.16.0/postgres_exporter-0.16.0.linux-${TARGETARCH}.tar.gz\ + && curl -sL https://github.com/prometheus-community/postgres_exporter/releases/download/v0.17.1/postgres_exporter-0.17.1.linux-${TARGETARCH}.tar.gz\ | tar xzf - --strip-components=1 -C.\ && curl -sL https://github.com/prometheus-community/pgbouncer_exporter/releases/download/v0.10.2/pgbouncer_exporter-0.10.2.linux-${TARGETARCH}.tar.gz\ | tar xzf - --strip-components=1 -C.\ From 604eb5e8d454104705eeeb3e60c68b9a12d221ef Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Wed, 5 Mar 2025 21:01:17 +0100 Subject: [PATCH 100/207] fix grafana dashboard link for pooler endoints (#11099) ## Problem Our benchmarking workflows contain links to grafana dashboards to troubleshoot problems. This works fine for non-pooled endpoints. For pooled endpoints we need to remove the `-pooler` suffix from the endpoint's hostname to get a valid endpoint ID. Example link that doesn't work in this run https://github.com/neondatabase/neon/actions/runs/13678933253/job/38246028316#step:8:311 ## Summary of changes Check if connection string is a -pooler connection string and if so remove this suffix from the endpoint ID. --------- Co-authored-by: Alexander Bayandin --- test_runner/fixtures/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test_runner/fixtures/utils.py b/test_runner/fixtures/utils.py index 84d62fb877..d1b2a5a400 100644 --- a/test_runner/fixtures/utils.py +++ b/test_runner/fixtures/utils.py @@ -337,6 +337,8 @@ def allure_add_grafana_link(host: str, timeline_id: TimelineId, start_ms: int, e """ # We expect host to be in format like ep-holy-mouse-w2u462gi.us-east-2.aws.neon.build endpoint_id, region_id, _ = host.split(".", 2) + # Remove "-pooler" suffix if present + endpoint_id = endpoint_id.removesuffix("-pooler") params = { "orgId": 1, From 1fe23fe8d25d01ea256e2f4a8b2294815f8b0671 Mon Sep 17 00:00:00 2001 From: Em Sharnoff Date: Wed, 5 Mar 2025 12:35:08 -0800 Subject: [PATCH 101/207] compute/lfc: Add chunk size to neon_lfc_stats (#11100) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR adds a new key to neon.neon_lfc_stats — 'file_cache_chunk_size_pages'. It just returns the value of BLOCKS_PER_CHUNK from the LFC implementation. The new value should (eventually) allow changing the chunk size without breaking any places that rely on LFC stats values measured in number of chunks. See neondatabase/cloud#25170 for more. --- compute/etc/neon_collector.jsonnet | 1 + compute/etc/sql_exporter/lfc_chunk_size.libsonnet | 10 ++++++++++ compute/etc/sql_exporter/lfc_chunk_size.sql | 1 + pgxn/neon/file_cache.c | 4 ++++ 4 files changed, 16 insertions(+) create mode 100644 compute/etc/sql_exporter/lfc_chunk_size.libsonnet create mode 100644 compute/etc/sql_exporter/lfc_chunk_size.sql diff --git a/compute/etc/neon_collector.jsonnet b/compute/etc/neon_collector.jsonnet index f8f4cab63b..da2b86d542 100644 --- a/compute/etc/neon_collector.jsonnet +++ b/compute/etc/neon_collector.jsonnet @@ -29,6 +29,7 @@ import 'sql_exporter/lfc_approximate_working_set_size.libsonnet', import 'sql_exporter/lfc_approximate_working_set_size_windows.libsonnet', import 'sql_exporter/lfc_cache_size_limit.libsonnet', + import 'sql_exporter/lfc_chunk_size.libsonnet', import 'sql_exporter/lfc_hits.libsonnet', import 'sql_exporter/lfc_misses.libsonnet', import 'sql_exporter/lfc_used.libsonnet', diff --git a/compute/etc/sql_exporter/lfc_chunk_size.libsonnet b/compute/etc/sql_exporter/lfc_chunk_size.libsonnet new file mode 100644 index 0000000000..bbe56f869f --- /dev/null +++ b/compute/etc/sql_exporter/lfc_chunk_size.libsonnet @@ -0,0 +1,10 @@ +{ + metric_name: 'lfc_chunk_size', + type: 'gauge', + help: 'LFC chunk size, measured in 8KiB pages', + key_labels: null, + values: [ + 'lfc_chunk_size_pages', + ], + query: importstr 'sql_exporter/lfc_chunk_size.sql', +} diff --git a/compute/etc/sql_exporter/lfc_chunk_size.sql b/compute/etc/sql_exporter/lfc_chunk_size.sql new file mode 100644 index 0000000000..0905870064 --- /dev/null +++ b/compute/etc/sql_exporter/lfc_chunk_size.sql @@ -0,0 +1 @@ +SELECT lfc_value AS lfc_chunk_size_pages FROM neon.neon_lfc_stats WHERE lfc_key = 'file_cache_chunk_size_pages'; diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index f6a577abfc..9f0a877b07 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -1369,6 +1369,10 @@ neon_get_lfc_stats(PG_FUNCTION_ARGS) if (lfc_ctl) value = lfc_ctl->limit; break; + case 8: + key = "file_cache_chunk_size_pages"; + value = BLOCKS_PER_CHUNK; + break; default: SRF_RETURN_DONE(funcctx); } From 2de3629b88ac8e68bdb5d236796c866e9b1bba25 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:02:44 -0500 Subject: [PATCH 102/207] test(pageserver): use reldirv2 by default in regress tests (#11081) ## Problem For pg_regress test, we do both v1 and v2; for all the rest, we default to v2. part of https://github.com/neondatabase/neon/issues/9516 ## Summary of changes Use reldir v2 across test cases by default. --------- Signed-off-by: Alex Chi Z --- pageserver/src/pgdatadir_mapping.rs | 15 ++++++---- pageserver/src/tenant.rs | 9 ++++-- .../src/tenant/remote_timeline_client.rs | 8 ++++- test_runner/fixtures/neon_fixtures.py | 3 ++ test_runner/regress/test_pg_regress.py | 29 ++++++++++++++----- 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index e663060d17..8bcc6d58ec 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -1964,14 +1964,12 @@ impl DatadirModification<'_> { .context("deserialize db")? }; - // Add the new relation to the rel directory entry, and write it back - if !rel_dir.rels.insert((rel.relnode, rel.forknum)) { - return Err(RelationError::AlreadyExists); - } - let v2_enabled = self.maybe_enable_rel_size_v2()?; if v2_enabled { + if rel_dir.rels.contains(&(rel.relnode, rel.forknum)) { + return Err(RelationError::AlreadyExists); + } let sparse_rel_dir_key = rel_tag_sparse_key(rel.spcnode, rel.dbnode, rel.relnode, rel.forknum); // check if the rel_dir_key exists in v2 @@ -2006,6 +2004,10 @@ impl DatadirModification<'_> { self.pending_directory_entries .push((DirectoryKind::RelV2, MetricsUpdate::Add(1))); } else { + // Add the new relation to the rel directory entry, and write it back + if !rel_dir.rels.insert((rel.relnode, rel.forknum)) { + return Err(RelationError::AlreadyExists); + } if !dbdir_exists { self.pending_directory_entries .push((DirectoryKind::Rel, MetricsUpdate::Set(0))) @@ -2019,6 +2021,7 @@ impl DatadirModification<'_> { )), ); } + // Put size let size_key = rel_size_to_key(rel); let buf = nblocks.to_le_bytes(); @@ -2141,7 +2144,7 @@ impl DatadirModification<'_> { // Remove entry from relation size cache self.tline.remove_cached_rel_size(&rel_tag); - // Delete size entry, as well as all blocks + // Delete size entry, as well as all blocks; this is currently a no-op because we haven't implemented tombstones in storage. self.delete(rel_key_range(rel_tag)); } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3694381078..c78d15c9b5 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -2451,6 +2451,7 @@ impl Tenant { create_guard, initdb_lsn, None, + None, ) .await } @@ -2782,6 +2783,7 @@ impl Tenant { timeline_create_guard, initdb_lsn, None, + None, ) .await } @@ -4869,6 +4871,7 @@ impl Tenant { timeline_create_guard, start_lsn + 1, Some(Arc::clone(src_timeline)), + Some(src_timeline.get_rel_size_v2_status()), ) .await?; @@ -5142,6 +5145,7 @@ impl Tenant { timeline_create_guard, pgdata_lsn, None, + None, ) .await?; @@ -5220,13 +5224,14 @@ impl Tenant { create_guard: TimelineCreateGuard, start_lsn: Lsn, ancestor: Option>, + rel_size_v2_status: Option, ) -> anyhow::Result> { let tenant_shard_id = self.tenant_shard_id; let resources = self.build_timeline_resources(new_timeline_id); resources .remote_client - .init_upload_queue_for_empty_remote(new_metadata)?; + .init_upload_queue_for_empty_remote(new_metadata, rel_size_v2_status.clone())?; let timeline_struct = self .create_timeline_struct( @@ -5238,7 +5243,7 @@ impl Tenant { CreateTimelineCause::Load, create_guard.idempotency.clone(), None, - None, + rel_size_v2_status, ) .context("Failed to create timeline data structure")?; diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index 2ca482ca43..a784a05972 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -437,9 +437,13 @@ impl RemoteTimelineClient { /// Initialize the upload queue for the case where the remote storage is empty, /// i.e., it doesn't have an `IndexPart`. + /// + /// `rel_size_v2_status` needs to be carried over during branching, and that's why + /// it's passed in here. pub fn init_upload_queue_for_empty_remote( &self, local_metadata: &TimelineMetadata, + rel_size_v2_status: Option, ) -> anyhow::Result<()> { // Set the maximum number of inprogress tasks to the remote storage concurrency. There's // certainly no point in starting more upload tasks than this. @@ -449,7 +453,9 @@ impl RemoteTimelineClient { .as_ref() .map_or(0, |r| r.concurrency_limit()); let mut upload_queue = self.upload_queue.lock().unwrap(); - upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?; + let initialized_queue = + upload_queue.initialize_empty_remote(local_metadata, inprogress_limit)?; + initialized_queue.dirty.rel_size_migration = rel_size_v2_status; self.update_remote_physical_size_gauge(None); info!("initialized upload queue as empty"); Ok(()) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index ef9d8cb46f..8e3277a34a 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1197,6 +1197,9 @@ class NeonEnv: config.pageserver_default_tenant_config_compaction_algorithm ) + tenant_config = ps_cfg.setdefault("tenant_config", {}) + tenant_config["rel_size_v2_enabled"] = True # Enable relsize_v2 by default in tests + if self.pageserver_remote_storage is not None: ps_cfg["remote_storage"] = remote_storage_to_toml_dict( self.pageserver_remote_storage diff --git a/test_runner/regress/test_pg_regress.py b/test_runner/regress/test_pg_regress.py index d2a78b16e4..1d9f385358 100644 --- a/test_runner/regress/test_pg_regress.py +++ b/test_runner/regress/test_pg_regress.py @@ -5,7 +5,7 @@ from __future__ import annotations from concurrent.futures import ThreadPoolExecutor from pathlib import Path -from typing import TYPE_CHECKING, cast +from typing import TYPE_CHECKING, Any, cast import pytest from fixtures.log_helper import log @@ -118,10 +118,20 @@ def post_checks(env: NeonEnv, test_output_dir: Path, db_name: str, endpoint: End pageserver.http_client().timeline_gc(shard, env.initial_timeline, None) +def patch_tenant_conf(tenant_conf: dict[str, Any], reldir_type: str) -> dict[str, Any]: + tenant_conf = tenant_conf.copy() + if reldir_type == "v2": + tenant_conf["rel_size_v2_enabled"] = "true" + else: + tenant_conf["rel_size_v2_enabled"] = "false" + return tenant_conf + + # Run the main PostgreSQL regression tests, in src/test/regress. # @pytest.mark.timeout(3000) # Contains many sub-tests, is slow in debug builds @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_pg_regress( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -130,6 +140,7 @@ def test_pg_regress( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "regression" @@ -142,7 +153,7 @@ def test_pg_regress( neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), initial_tenant_shard_count=shard_count, ) @@ -196,6 +207,7 @@ def test_pg_regress( # @pytest.mark.timeout(1500) # Contains many sub-tests, is slow in debug builds @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_isolation( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -204,6 +216,7 @@ def test_isolation( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "isolation_regression" @@ -211,7 +224,8 @@ def test_isolation( neon_env_builder.num_pageservers = shard_count neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), + initial_tenant_shard_count=shard_count, ) # Connect to postgres and create a database called "regression". @@ -267,6 +281,7 @@ def test_isolation( # Run extra Neon-specific pg_regress-based tests. The tests and their # schedule file are in the sql_regress/ directory. @pytest.mark.parametrize("shard_count", [None, 4]) +@pytest.mark.parametrize("reldir_type", ["v1", "v2"]) def test_sql_regress( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, @@ -275,6 +290,7 @@ def test_sql_regress( base_dir: Path, pg_distrib_dir: Path, shard_count: int | None, + reldir_type: str, ): DBNAME = "regression" @@ -282,7 +298,8 @@ def test_sql_regress( neon_env_builder.num_pageservers = shard_count neon_env_builder.enable_pageserver_remote_storage(s3_storage()) env = neon_env_builder.init_start( - initial_tenant_conf=TENANT_CONF, initial_tenant_shard_count=shard_count + initial_tenant_conf=patch_tenant_conf(TENANT_CONF, reldir_type), + initial_tenant_shard_count=shard_count, ) # Connect to postgres and create a database called "regression". @@ -345,9 +362,7 @@ def test_tx_abort_with_many_relations( """ env = neon_env_builder.init_start( - initial_tenant_conf={ - "rel_size_v2_enabled": "true" if reldir_type == "v2" else "false", - } + initial_tenant_conf=patch_tenant_conf({}, reldir_type), ) ep = env.endpoints.create_start( "main", From 78b322f616a711e40ae8babc4b013782fb12a99a Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 5 Mar 2025 16:43:16 -0500 Subject: [PATCH 103/207] rfc: add 041-rel-sparse-keyspace (#10412) Based on the PoC patch I've done in #10316, I'd like to put an RFC in advance to ensure everyone is on the same page, and start incrementally port the code to the main branch. https://github.com/neondatabase/neon/issues/9516 [Rendered](https://github.com/neondatabase/neon/blob/skyzh/rfc-041-rel-sparse-keyspace/docs/rfcs/041-rel-sparse-keyspace.md) --------- Signed-off-by: Alex Chi Z Co-authored-by: Erik Grinaker --- docs/rfcs/041-rel-sparse-keyspace.md | 201 +++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 docs/rfcs/041-rel-sparse-keyspace.md diff --git a/docs/rfcs/041-rel-sparse-keyspace.md b/docs/rfcs/041-rel-sparse-keyspace.md new file mode 100644 index 0000000000..03e68bd5c1 --- /dev/null +++ b/docs/rfcs/041-rel-sparse-keyspace.md @@ -0,0 +1,201 @@ +# Sparse Keyspace for Relation Directories + +## Summary + +This is an RFC describing a new storage strategy for storing relation directories. + +## Motivation + +Postgres maintains a directory structure for databases and relations. In Neon, we store these information +by serializing the directory data in a single key (see `pgdatadir_mapping.rs`). + +```rust +// DbDir: +// 00 00000000 00000000 00000000 00 00000000 + +// RelDir: +// 00 SPCNODE DBNODE 00000000 00 00000001 (Postgres never uses relfilenode 0) +``` + +We have a dedicated structure on the ingestion path to serialize the relation directory into this single key. + +```rust +#[derive(Debug, Serialize, Deserialize, Default)] +pub(crate) struct RelDirectory { + // Set of relations that exist. (relfilenode, forknum) + // + // TODO: Store it as a btree or radix tree or something else that spans multiple + // key-value pairs, if you have a lot of relations + pub(crate) rels: HashSet<(Oid, u8)>, +} +``` + +The current codebase has the following three access patterns for the relation directory. + +1. Check if a relation exists. +2. List all relations. +3. Create/drop a relation. + +For (1), we currently have to get the reldir key, deserialize it, and check whether the relation exists in the +hash set. For (2), we get the reldir key and the hash set. For (3), we need first to get +and deserialize the key, add the new relation record to the hash set, and then serialize it and write it back. + +If we have 100k relations in a database, we would have a 100k-large hash set. Then, every +relation created and dropped would have deserialized and serialized this 100k-large hash set. This makes the +relation create/drop process to be quadratic. When we check if a relation exists in the ingestion path, +we would have to deserialize this super big 100k-large key before checking if a single relation exists. + +In this RFC, we will propose a new way to store the reldir data in the sparse keyspace and propose how +to seamlessly migrate users to use the new keyspace. + +The PoC patch is implemented in [PR10316](https://github.com/neondatabase/neon/pull/10316). + +## Key Mapping + +We will use the recently introduced sparse keyspace to store actual data. Sparse keyspace was proposed in +[038-aux-file-v2.md](038-aux-file-v2.md). The original reldir has one single value of `HashSet<(Oid, u8)>` +for each of the databases (identified as `spcnode, dbnode`). We encode the `Oid` (`relnode, forknum`), +into the key. + +```plain +(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> deleted +(REL_DIR_KEY_PREFIX, spcnode, dbnode, relnode, forknum, 1) -> exists +``` + +Assume all reldir data are stored in this new keyspace; the 3 reldir operations we mentioned before can be +implemented as follows. + +1. Check if a relation exists: check if the key maps to "exists". +2. List all relations: scan the sprase keyspace over the `rel_dir_key_prefix`. Extract relnode and forknum from the key. +3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation. The delete tombstone will + be removed during image layer generation upon compaction. + +Note that "exists" and "deleted" will be encoded as a single byte as two variants of an enum. +The mapping is implemented as `rel_tag_sparse_key` in the PoC patch. + +## Changes to Sparse Keyspace + +Previously, we only used sparse keyspaces for the aux files, which did not carry over when branching. The reldir +information needs to be preserved from the parent branch to the child branch. Therefore, the read path needs +to be updated accordingly to accommodate such "inherited sparse keys". This is done in +[PR#10313](https://github.com/neondatabase/neon/pull/10313). + +## Coexistence of the Old and New Keyspaces + +Migrating to the new keyspace will be done gradually: when we flip a config item to enable the new reldir keyspace, the +ingestion path will start to write to the new keyspace and the old reldir data will be kept in the old one. The read +path needs to combine the data from both keyspaces. + +Theoretically, we could do a rewrite at the startup time that scans all relation directories and copies that data into the +new keyspace. However, this could take a long time, especially if we have thousands of tenants doing the migration +process simultaneously after the pageserver restarts. Therefore, we propose the coexistence strategy so that the +migration can happen seamlessly and imposes no potential downtime for the user. + +With the coexistence assumption, the 3 reldir operations will be implemented as follows: + +1. Check if a relation exists + - Check the new keyspace if the key maps to any value. If it maps to "exists" or "deleted", directly + return it to the user. + - Otherwise, deserialize the old reldir key and get the result. +2. List all relations: scan the sparse keyspace over the `rel_dir_key_prefix` and deserialize the old reldir key. + Combine them to obtain the final result. +3. Create/drop a relation: write "exists" or "deleted" to the corresponding key of the relation into the new keyspace. + - We assume no overwrite of relations will happen (i.e., the user won't create a relation at the same Oid). This will be implemented as a runtime check. + - For relation creation, we add `sparse_reldir_tableX -> exists` to the keyspace. + - For relation drop, we first check if the relation is recorded in the old keyspace. If yes, we deserialize the old reldir key, + remove the relation, and then write it back. Otherwise, we put `sparse_reldir_tableX -> deleted` to the keyspace. + - The delete tombstone will be removed during image layer generation upon compaction. + +This process ensures that the transition will not introduce any downtime and all new updates are written to the new keyspace. The total +amount of data in the storage would be `O(relations_modifications)` and we can guarantee `O(current_relations)` after compaction. +There could be some relations that exist in the old reldir key for a long time. Refer to the "Full Migration" section on how to deal +with them. Plus, for relation modifications, it will have `O(old_relations)` complexity until we do the full migration, which gives +us `O(1)` complexity after fully opt-in the sparse keyspace. + +The process also implies that a relation will only exists either in the old reldir key or in the new sparse keyspace. It is not possible +to have a table to be recorded in the old reldir key while later having a delete tombstone for it in the sparse keyspace at any LSN. + +We will introduce a config item and an index_part record to record the current status of the migration process. + +- Config item `enable_reldir_v2`: controls whether the ingestion path writes the reldir info into the new keyspace. +- `index_part.json` field `reldir_v2_status`: whether the timeline has written any key into the new reldir keyspace. + +If `enable_reldir_v2` is set to `true` and the timeline ingests the first key into the new reldir keyspace, it will update +`index_part.json` to set `reldir_v2_status` to `Status::Migrating`. Even if `enable_reldir_v2` gets flipped back to +`false` (i.e., when the pageserver restarts and such config isn't persisted), the read/write path will still +read/write to the new keyspace to avoid data inconsistency. This also indicates that the migration is one-way only: +once v2 is enabled, the user cannot go back to v1. + +## Next Steps + +### Full Migration + +This won't be implemented in the project's first phase but might be implemented in the future. Having both v1 and +v2 existing in the system would force us to keep the code to deserialize the old reldir key forever. To entirely deprecate this +code path, we must ensure the timeline has no old reldir data. + +We can trigger a special image layer generation process at the gc-horizon. The generated image layers will cover several keyspaces: +the old reldir key in each of the databases, and the new reldir sparse keyspace. It will remove the old reldir key while +copying them into the corresponding keys in the sparse keyspace in the resulting image. This special process happens in +the background during compaction. For example, assume this special process is triggered at LSN 0/180. The `create_image_layers` +process discovers the following keys at this LSN. + +```plain +db1/reldir_key -> (table 1, table 2, table 3) +...db1 rel keys +db2/reldir_key -> (table 4, table 5, table 6) +...db2 rel keys +sparse_reldir_db2_table7 -> exists +sparse_reldir_db1_table8 -> deleted +``` + +It will generate the following keys: + +```plain +db1/reldir_key -> () # we have to keep the key because it is part of `collect_keyspace`. +...db1 rel keys +db2/reldir_key -> () +...db2 rel keys + +-- start image layer for the sparse keyspace at sparse_reldir_prefix at LSN 0/180 +sparse_reldir_db1_table1 -> exists +sparse_reldir_db1_table2 -> exists +sparse_reldir_db1_table3 -> exists +sparse_reldir_db2_table4 -> exists +sparse_reldir_db2_table5 -> exists +sparse_reldir_db2_table6 -> exists +sparse_reldir_db2_table7 -> exists +-- end image layer for the sparse keyspace at sparse_reldir_prefix+1 + +# The `sparse_reldir_db1_table8` key gets dropped as part of the image layer generation code for the sparse keyspace. +# Note that the read path will stop reading if a key is not found in the image layer covering the key range so there +# are no correctness issue. +``` + +We must verify that no pending modifications to the old reldir exists in the delta/image layers above the gc-horizon before +we start this process (We can do a vectored read to get the full key history of the old reldir key and ensure there are no more images +above the gc-horizon). Otherwise, it will violate the property that "a relation will only exists either in the old reldir key or +in the new sparse keyspace". After we run this migration process, we can mark `reldir_v2_status` in the `index_part.json` to +`Status::Migrated`, and the read path won't need to read from the old reldir anymore. Once the status is set to `Migrated`, we +don't need to add the key into `collect_keyspace` and therefore all of them will be removed from all future image layers. + +The migration process can be proactively triggered across all attached/detached tenants to help us fully remove the old reldir code. + +### Consolidate Relation Size Keys + +We have relsize at the end of all relation nodes. + +```plain +// RelSize: +// 00 SPCNODE DBNODE RELNODE FORK FFFFFFFF +``` + +This means that computing logical size requires us to do several single-key gets across the keyspace, +potentially requiring downloading many layer files. We could consolidate them into a single +keyspace, improving logical size calculation performance. + +### Migrate DBDir Keys + +We assume the number of databases created by the users will be small, and therefore, the current way +of storing the database directory would be acceptable. In the future, we could also migrate DBDir keys into +the sparse keyspace to support large amount of databases. From f343537e4dba622114011154a121df7fb8d57afc Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Thu, 6 Mar 2025 09:18:28 +0000 Subject: [PATCH 104/207] proxy: Small adjustments to json logging (#11107) * Remove callsite identifier registration on span creation. Forgot to remove from last PR. Was part of alternative idea. * Move "spans" object to right after "fields", so event and span fields are listed together. --- proxy/src/logging.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index b2e95a109f..6f9845fd6e 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -286,17 +286,16 @@ where /// Registers a SpanFields instance as span extension. fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) { - let csid = self.callsite_id(attrs.metadata().callsite()); let span = ctx.span(id).expect("span must exist"); let fields = SpanFields::default(); fields.record_fields(attrs); + // This could deadlock when there's a panic somewhere in the tracing // event handling and a read or write guard is still held. This includes // the OTel subscriber. let mut exts = span.extensions_mut(); exts.insert(fields); - exts.insert(csid); } fn on_record(&self, id: &span::Id, values: &span::Record<'_>, ctx: Context<'_, S>) { @@ -565,6 +564,13 @@ impl EventFormatter { )?; } + let spans = SerializableSpans { + ctx, + callsite_ids, + extract: ExtractedSpanFields::<'_, F>::new(extract_fields), + }; + serializer.serialize_entry("spans", &spans)?; + // TODO: thread-local cache? let pid = std::process::id(); // Skip adding pid 1 to reduce noise for services running in containers. @@ -614,15 +620,9 @@ impl EventFormatter { } } - let stack = SerializableSpans { - ctx, - callsite_ids, - fields: ExtractedSpanFields::<'_, F>::new(extract_fields), - }; - serializer.serialize_entry("spans", &stack)?; - - if stack.fields.has_values() { - serializer.serialize_entry("extract", &stack.fields)?; + if spans.extract.has_values() { + // TODO: add fields from event, too? + serializer.serialize_entry("extract", &spans.extract)?; } serializer.end() @@ -911,7 +911,7 @@ where { ctx: &'a Context<'ctx, Span>, callsite_ids: &'a papaya::HashMap, - fields: ExtractedSpanFields<'a, F>, + extract: ExtractedSpanFields<'a, F>, } impl serde::ser::Serialize for SerializableSpans<'_, '_, Span, F> @@ -940,7 +940,7 @@ where serializer.serialize_value(&SerializableSpanFields { span: &span, - fields: &self.fields, + extract: &self.extract, })?; } } @@ -955,7 +955,7 @@ where Span: for<'lookup> LookupSpan<'lookup>, { span: &'a SpanRef<'span, Span>, - fields: &'a ExtractedSpanFields<'a, F>, + extract: &'a ExtractedSpanFields<'a, F>, } impl serde::ser::Serialize for SerializableSpanFields<'_, '_, Span, F> @@ -973,7 +973,7 @@ where for (name, value) in &data.fields.pin() { serializer.serialize_entry(name, value)?; // TODO: replace clone with reference, if possible. - self.fields.set(name, value.clone()); + self.extract.set(name, value.clone()); } } From 16b8a3f598ff3b7b22f4411fdf55c088ecf6c84e Mon Sep 17 00:00:00 2001 From: Folke Behrens Date: Thu, 6 Mar 2025 09:55:41 +0000 Subject: [PATCH 105/207] Update Jinja2 to 3.1.6 (#11109) https://github.com/neondatabase/neon/security/dependabot/89 --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index ba3b0535e4..03aa543b06 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1414,14 +1414,14 @@ files = [ [[package]] name = "jinja2" -version = "3.1.5" +version = "3.1.6" description = "A very fast and expressive template engine." optional = false python-versions = ">=3.7" groups = ["main"] files = [ - {file = "jinja2-3.1.5-py3-none-any.whl", hash = "sha256:aba0f4dc9ed8013c424088f68a5c226f7d6097ed89b246d7749c2ec4175c6adb"}, - {file = "jinja2-3.1.5.tar.gz", hash = "sha256:8fefff8dc3034e27bb80d67c671eb8a9bc424c0ef4c0826edbff304cceff43bb"}, + {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, + {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, ] [package.dependencies] @@ -3820,4 +3820,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "9711c5479c867fa614ce3d352f1bbc63dba1cb2376d347f96fbeda6f512ee308" +content-hash = "010ffce959bb256880ab5a267048c182e4612b3151f9a94e3bf5d3a7807962fe" diff --git a/pyproject.toml b/pyproject.toml index c6e5073bcd..e7f5c62bd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ requests = "^2.32.3" pytest-xdist = "^3.3.1" asyncpg = "^0.30.0" aiopg = "^1.4.0" -Jinja2 = "^3.1.5" +Jinja2 = "^3.1.6" types-requests = "^2.31.0.0" types-psycopg2 = "^2.9.21.20241019" boto3 = "^1.34.11" From ab7efe9e47952292f463027a007f4386c605348e Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 6 Mar 2025 11:23:48 +0100 Subject: [PATCH 106/207] pageserver: add amortized read amp metrics (#11093) ## Problem In a batch, `pageserver_layers_per_read_global` counts all layer visits towards every read in the batch, since this directly affects the observed latency of the read. However, this doesn't give a good picture of the amortized read amplification due to batching. ## Summary of changes Add two more global read amp metrics: * `pageserver_layers_per_read_batch_global`: number of layers visited per batch. * `pageserver_layers_per_read_amortized_global`: number of layers divided by reads in a batch. --- pageserver/src/metrics.rs | 25 +++++++++++++++++++++++++ pageserver/src/tenant/timeline.rs | 25 ++++++++++++++++++------- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index eb8a9b8e24..b5b4e5c91f 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -143,6 +143,29 @@ pub(crate) static LAYERS_PER_READ_GLOBAL: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +pub(crate) static LAYERS_PER_READ_BATCH_GLOBAL: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_layers_per_read_batch_global", + "Layers visited to serve a single read batch (read amplification), regardless of number of reads.", + vec![ + 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0 + ], + ) + .expect("failed to define a metric") +}); + +pub(crate) static LAYERS_PER_READ_AMORTIZED_GLOBAL: Lazy = Lazy::new(|| { + register_histogram!( + "pageserver_layers_per_read_amortized_global", + "Layers visited to serve a single read (read amplification). Amortized across a batch: \ + all visited layers are divided by number of reads.", + vec![ + 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0, 512.0, 1024.0 + ], + ) + .expect("failed to define a metric") +}); + pub(crate) static DELTAS_PER_READ_GLOBAL: Lazy = Lazy::new(|| { // We expect this to be low because of Postgres checkpoints. Let's see if that holds. register_histogram!( @@ -4074,6 +4097,8 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) { // histograms [ &LAYERS_PER_READ_GLOBAL, + &LAYERS_PER_READ_BATCH_GLOBAL, + &LAYERS_PER_READ_AMORTIZED_GLOBAL, &DELTAS_PER_READ_GLOBAL, &WAIT_LSN_TIME, &WAL_REDO_TIME, diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 7ed7910732..f646e621d3 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -99,7 +99,8 @@ use crate::disk_usage_eviction_task::{DiskUsageEvictionInfo, EvictionCandidate, use crate::keyspace::{KeyPartitioning, KeySpace}; use crate::l0_flush::{self, L0FlushGlobalState}; use crate::metrics::{ - DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics, + DELTAS_PER_READ_GLOBAL, LAYERS_PER_READ_AMORTIZED_GLOBAL, LAYERS_PER_READ_BATCH_GLOBAL, + LAYERS_PER_READ_GLOBAL, ScanLatencyOngoingRecording, TimelineMetrics, }; use crate::page_service::TenantManagerTypes; use crate::pgdatadir_mapping::{ @@ -1330,10 +1331,6 @@ impl Timeline { // (this is a requirement, not a bug). Skip updating the metric in these cases // to avoid infinite results. if !results.is_empty() { - // Record the total number of layers visited towards each key in the batch. While some - // layers may not intersect with a given read, and the cost of layer visits are - // amortized across the batch, each visited layer contributes directly to the observed - // latency for every read in the batch, which is what we care about. if layers_visited >= Self::LAYERS_VISITED_WARN_THRESHOLD { static LOG_PACER: Lazy> = Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(60)))); @@ -1348,9 +1345,23 @@ impl Timeline { }); } + // Records the number of layers visited in a few different ways: + // + // * LAYERS_PER_READ: all layers count towards every read in the batch, because each + // layer directly affects its observed latency. + // + // * LAYERS_PER_READ_BATCH: all layers count towards each batch, to get the per-batch + // layer visits and access cost. + // + // * LAYERS_PER_READ_AMORTIZED: the average layer count per read, to get the amortized + // read amplification after batching. + let layers_visited = layers_visited as f64; + let avg_layers_visited = layers_visited / results.len() as f64; + LAYERS_PER_READ_BATCH_GLOBAL.observe(layers_visited); for _ in &results { - self.metrics.layers_per_read.observe(layers_visited as f64); - LAYERS_PER_READ_GLOBAL.observe(layers_visited as f64); + self.metrics.layers_per_read.observe(layers_visited); + LAYERS_PER_READ_GLOBAL.observe(layers_visited); + LAYERS_PER_READ_AMORTIZED_GLOBAL.observe(avg_layers_visited); } } From 43cea0df91f29509f69e1083cf1b68645e18c8f9 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 6 Mar 2025 11:23:25 +0000 Subject: [PATCH 107/207] pageserver: allow for unit test stress test (#11112) ## Problem I like using `cargo stress` to hammer on a test, but it doesn't work out of the box because it does parallel runs by default and tests always use the same repo dir. ## Summary of changes Add an uuid to the test repo dir when generating it. --- Cargo.lock | 1 + pageserver/Cargo.toml | 1 + pageserver/src/config.rs | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 7aa9c53e7e..67f0fa4b77 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4303,6 +4303,7 @@ dependencies = [ "tracing", "url", "utils", + "uuid", "wal_decoder", "walkdir", "workspace_hack", diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 7330856be4..fa16090170 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -98,6 +98,7 @@ criterion.workspace = true hex-literal.workspace = true tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util", "time", "test-util"] } indoc.workspace = true +uuid.workspace = true [[bench]] name = "bench_layer_map" diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 33ae8c4790..582019d96f 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -491,7 +491,9 @@ impl PageServerConf { #[cfg(test)] pub fn test_repo_dir(test_name: &str) -> Utf8PathBuf { let test_output_dir = std::env::var("TEST_OUTPUT").unwrap_or("../tmp_check".into()); - Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}")) + + let test_id = uuid::Uuid::new_v4(); + Utf8PathBuf::from(format!("{test_output_dir}/test_{test_name}_{test_id}")) } pub fn dummy_conf(repo_dir: Utf8PathBuf) -> Self { From 5ceb8c994d5c22737ef6c2c70349525f13cc225c Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Thu, 6 Mar 2025 11:25:02 +0000 Subject: [PATCH 108/207] pageserver: mark unarchival heatmap layers as cold (#11098) ## Problem On unarchival, we update the previous heatmap with all visible layers. When the primary generates a new heatmap it includes all those layers, so the secondary will download them. Since they're not actually resident on the primary (we didn't call the warm up API), they'll never be evicted, so they remain in the heatmap. We want these layers in the heatmap, since we might wish to warm-up an unarchived timeline after a shard migration. However, we don't want them to be downloaded on the secondary until we've warmed up the primary. ## Summary of Changes Include these layers in the heatmap and mark them as cold. All heatmap operations act on non-cold layers apart from the attached location warming up API, which will download the cold layers. Once the cold layers are downloaded on the primary, they'll be included in the next heatmap as hot and the secondary starts fetching them too. --- pageserver/src/tenant/secondary/downloader.rs | 14 ++--- pageserver/src/tenant/secondary/heatmap.rs | 24 ++++++-- pageserver/src/tenant/storage_layer/layer.rs | 4 +- pageserver/src/tenant/timeline.rs | 28 ++++++---- .../timeline/heatmap_layers_downloader.rs | 4 +- .../regress/test_pageserver_secondary.py | 55 ++++++++++++++++--- 6 files changed, 96 insertions(+), 33 deletions(-) diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index a13b9323ac..5f3a0932c4 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -869,8 +869,7 @@ impl<'a> TenantDownloader<'a> { let heatmap_timeline = heatmap.timelines.get(heatmap_timeline_index).unwrap(); let layers_in_heatmap = heatmap_timeline - .layers - .iter() + .hot_layers() .map(|l| (&l.name, l.metadata.generation)) .collect::>(); let layers_on_disk = timeline_state @@ -1015,7 +1014,8 @@ impl<'a> TenantDownloader<'a> { // Accumulate updates to the state let mut touched = Vec::new(); - for layer in timeline.layers { + let timeline_id = timeline.timeline_id; + for layer in timeline.into_hot_layers() { if self.secondary_state.cancel.is_cancelled() { tracing::debug!("Cancelled -- dropping out of layer loop"); return (Err(UpdateError::Cancelled), touched); @@ -1040,7 +1040,7 @@ impl<'a> TenantDownloader<'a> { } match self - .download_layer(tenant_shard_id, &timeline.timeline_id, layer, ctx) + .download_layer(tenant_shard_id, &timeline_id, layer, ctx) .await { Ok(Some(layer)) => touched.push(layer), @@ -1148,7 +1148,7 @@ impl<'a> TenantDownloader<'a> { let tenant_shard_id = self.secondary_state.get_tenant_shard_id(); let timeline_id = timeline.timeline_id; - tracing::debug!(timeline_id=%timeline_id, "Downloading layers, {} in heatmap", timeline.layers.len()); + tracing::debug!(timeline_id=%timeline_id, "Downloading layers, {} in heatmap", timeline.hot_layers().count()); let (result, touched) = self .download_timeline_layers(tenant_shard_id, timeline, timeline_state, deadline, ctx) @@ -1316,11 +1316,11 @@ async fn init_timeline_state( // As we iterate through layers found on disk, we will look up their metadata from this map. // Layers not present in metadata will be discarded. let heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> = - heatmap.layers.iter().map(|l| (&l.name, l)).collect(); + heatmap.hot_layers().map(|l| (&l.name, l)).collect(); let last_heatmap_metadata: HashMap<&LayerName, &HeatMapLayer> = if let Some(last_heatmap) = last_heatmap { - last_heatmap.layers.iter().map(|l| (&l.name, l)).collect() + last_heatmap.hot_layers().map(|l| (&l.name, l)).collect() } else { HashMap::new() }; diff --git a/pageserver/src/tenant/secondary/heatmap.rs b/pageserver/src/tenant/secondary/heatmap.rs index 4a938e9095..6dbb3f091f 100644 --- a/pageserver/src/tenant/secondary/heatmap.rs +++ b/pageserver/src/tenant/secondary/heatmap.rs @@ -42,7 +42,7 @@ pub(crate) struct HeatMapTimeline { #[serde_as(as = "DisplayFromStr")] pub(crate) timeline_id: TimelineId, - pub(crate) layers: Vec, + layers: Vec, } #[serde_as] @@ -53,8 +53,10 @@ pub(crate) struct HeatMapLayer { #[serde_as(as = "TimestampSeconds")] pub(crate) access_time: SystemTime, - // TODO: an actual 'heat' score that would let secondary locations prioritize downloading - // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary. + + #[serde(default)] + pub(crate) cold: bool, // TODO: an actual 'heat' score that would let secondary locations prioritize downloading + // the hottest layers, rather than trying to simply mirror whatever layers are on-disk on the primary. } impl HeatMapLayer { @@ -62,11 +64,13 @@ impl HeatMapLayer { name: LayerName, metadata: LayerFileMetadata, access_time: SystemTime, + cold: bool, ) -> Self { Self { name, metadata, access_time, + cold, } } } @@ -78,6 +82,18 @@ impl HeatMapTimeline { layers, } } + + pub(crate) fn into_hot_layers(self) -> impl Iterator { + self.layers.into_iter().filter(|l| !l.cold) + } + + pub(crate) fn hot_layers(&self) -> impl Iterator { + self.layers.iter().filter(|l| !l.cold) + } + + pub(crate) fn all_layers(&self) -> impl Iterator { + self.layers.iter() + } } pub(crate) struct HeatMapStats { @@ -92,7 +108,7 @@ impl HeatMapTenant { layers: 0, }; for timeline in &self.timelines { - for layer in &timeline.layers { + for layer in timeline.hot_layers() { stats.layers += 1; stats.bytes += layer.metadata.file_size; } diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index bde7fbc1f9..247092bf45 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -1563,10 +1563,10 @@ impl LayerInner { self.access_stats.record_residence_event(); - self.status.as_ref().unwrap().send_replace(Status::Evicted); - *self.last_evicted_at.lock().unwrap() = Some(std::time::Instant::now()); + self.status.as_ref().unwrap().send_replace(Status::Evicted); + Ok(()) } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index f646e621d3..4483ecfe94 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3648,7 +3648,7 @@ impl Timeline { let visible_non_resident = match previous_heatmap.as_deref() { Some(PreviousHeatmap::Active { heatmap, read_at, .. - }) => Some(heatmap.layers.iter().filter_map(|hl| { + }) => Some(heatmap.all_layers().filter_map(|hl| { let desc: PersistentLayerDesc = hl.name.clone().into(); let layer = guard.try_get_from_key(&desc.key())?; @@ -3664,7 +3664,7 @@ impl Timeline { return None; } - Some((desc, hl.metadata.clone(), hl.access_time)) + Some((desc, hl.metadata.clone(), hl.access_time, hl.cold)) })), Some(PreviousHeatmap::Obsolete) => None, None => None, @@ -3680,6 +3680,7 @@ impl Timeline { layer.layer_desc().clone(), layer.metadata(), last_activity_ts, + false, // these layers are not cold )) } LayerVisibilityHint::Covered => { @@ -3706,12 +3707,14 @@ impl Timeline { // Sort layers in order of which to download first. For a large set of layers to download, we // want to prioritize those layers which are most likely to still be in the resident many minutes // or hours later: + // - Cold layers go last for convenience when a human inspects the heatmap. // - Download L0s last, because they churn the fastest: L0s on a fast-writing tenant might // only exist for a few minutes before being compacted into L1s. // - For L1 & image layers, download most recent LSNs first: the older the LSN, the sooner // the layer is likely to be covered by an image layer during compaction. - layers.sort_by_key(|(desc, _meta, _atime)| { + layers.sort_by_key(|(desc, _meta, _atime, cold)| { std::cmp::Reverse(( + *cold, !LayerMap::is_l0(&desc.key_range, desc.is_delta), desc.lsn_range.end, )) @@ -3719,7 +3722,9 @@ impl Timeline { let layers = layers .into_iter() - .map(|(desc, meta, atime)| HeatMapLayer::new(desc.layer_name(), meta, atime)) + .map(|(desc, meta, atime, cold)| { + HeatMapLayer::new(desc.layer_name(), meta, atime, cold) + }) .collect(); Some(HeatMapTimeline::new(self.timeline_id, layers)) @@ -3739,6 +3744,7 @@ impl Timeline { name: vl.layer_desc().layer_name(), metadata: vl.metadata(), access_time: now, + cold: true, }; heatmap_layers.push(hl); } @@ -7040,6 +7046,7 @@ mod tests { use pageserver_api::key::Key; use pageserver_api::value::Value; + use std::iter::Iterator; use tracing::Instrument; use utils::id::TimelineId; use utils::lsn::Lsn; @@ -7053,8 +7060,8 @@ mod tests { use crate::tenant::{PreviousHeatmap, Timeline}; fn assert_heatmaps_have_same_layers(lhs: &HeatMapTimeline, rhs: &HeatMapTimeline) { - assert_eq!(lhs.layers.len(), rhs.layers.len()); - let lhs_rhs = lhs.layers.iter().zip(rhs.layers.iter()); + assert_eq!(lhs.all_layers().count(), rhs.all_layers().count()); + let lhs_rhs = lhs.all_layers().zip(rhs.all_layers()); for (l, r) in lhs_rhs { assert_eq!(l.name, r.name); assert_eq!(l.metadata, r.metadata); @@ -7132,10 +7139,11 @@ mod tests { assert_eq!(heatmap.timeline_id, timeline.timeline_id); // L0 should come last - assert_eq!(heatmap.layers.last().unwrap().name, l0_delta.layer_name()); + let heatmap_layers = heatmap.all_layers().collect::>(); + assert_eq!(heatmap_layers.last().unwrap().name, l0_delta.layer_name()); let mut last_lsn = Lsn::MAX; - for layer in &heatmap.layers { + for layer in heatmap_layers { // Covered layer should be omitted assert!(layer.name != covered_delta.layer_name()); @@ -7264,7 +7272,7 @@ mod tests { .expect("Infallible while timeline is not shut down"); // Both layers should be in the heatmap - assert!(!heatmap.layers.is_empty()); + assert!(heatmap.all_layers().count() > 0); // Now simulate a migration. timeline @@ -7290,7 +7298,7 @@ mod tests { .await .expect("Infallible while timeline is not shut down"); - assert!(post_eviction_heatmap.layers.is_empty()); + assert_eq!(post_eviction_heatmap.all_layers().count(), 0); assert!(matches!( timeline.previous_heatmap.load().as_deref(), Some(PreviousHeatmap::Obsolete) diff --git a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs index 6209b63de4..11df232a10 100644 --- a/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs +++ b/pageserver/src/tenant/timeline/heatmap_layers_downloader.rs @@ -61,11 +61,11 @@ impl HeatmapLayersDownloader { tracing::info!( resident_size=%timeline.resident_physical_size(), - heatmap_layers=%heatmap.layers.len(), + heatmap_layers=%heatmap.all_layers().count(), "Starting heatmap layers download" ); - let stream = futures::stream::iter(heatmap.layers.into_iter().filter_map( + let stream = futures::stream::iter(heatmap.all_layers().cloned().filter_map( |layer| { let ctx = ctx.attached_child(); let tl = timeline.clone(); diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index ab0f00db1c..b9e2934505 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -955,6 +955,17 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): raise RuntimeError(f"No heatmap for timeline: {tlid}") + def count_timeline_heatmap_layers(tlid) -> tuple[int, int]: + cold, hot = 0, 0 + layers = timeline_heatmap(tlid)["layers"] + for layer in layers: + if layer["cold"]: + cold += 1 + else: + hot += 1 + + return cold, hot + env.storage_controller.allowed_errors.extend( [ ".*Timed out.*downloading layers.*", @@ -988,13 +999,19 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): TenantShardId(tenant_id, shard_number=0, shard_count=0), timeline_id ) - def all_layers_downloaded(expected_layer_count: int): - local_layers_count = len(ps_secondary.list_layers(tenant_id, timeline_id)) + def all_layers_downloaded(node, expected_layer_count: int): + local_layers_count = len(node.list_layers(tenant_id, timeline_id)) log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}") assert local_layers_count >= expected_layer_count - wait_until(lambda: all_layers_downloaded(after_migration_heatmap_layers_count)) + def no_layers_downloaded(node): + local_layers_count = len(node.list_layers(tenant_id, timeline_id)) + + log.info(f"{local_layers_count=} {after_migration_heatmap_layers_count=}") + assert local_layers_count == 0 + + wait_until(lambda: all_layers_downloaded(ps_secondary, after_migration_heatmap_layers_count)) # Read everything and make sure that we're not downloading anything extra. # All hot layers should be available locally now. @@ -1047,13 +1064,35 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): wait_until(lambda: check_archival_state(TimelineArchivalState.UNARCHIVED, child_timeline_id)) ps_secondary.http_client().tenant_heatmap_upload(tenant_id) - log.info(f"Parent timeline heatmap size: {len(timeline_heatmap(timeline_id)['layers'])}") - log.info(f"Child timeline heatmap size: {len(timeline_heatmap(child_timeline_id)['layers'])}") - expected_locally = len(timeline_heatmap(timeline_id)["layers"]) - assert expected_locally > 0 + parent_cold, parent_hot = count_timeline_heatmap_layers(timeline_id) + child_cold, child_hot = count_timeline_heatmap_layers(child_timeline_id) + + log.info(f"Parent timeline heatmap size: cold={parent_cold}, hot={parent_hot}") + log.info(f"Child timeline heatmap size: cold={child_cold}, hot={child_hot}") + + # All layers in the heatmap should come from the generation on unarchival. + # Hence, they should be cold. + assert parent_cold > 0 + assert parent_hot == 0 + + expected_locally = parent_cold env.storage_controller.download_heatmap_layers( TenantShardId(tenant_id, shard_number=0, shard_count=0), child_timeline_id, recurse=True ) - wait_until(lambda: all_layers_downloaded(expected_locally)) + wait_until(lambda: all_layers_downloaded(ps_secondary, expected_locally)) + + for ps in env.pageservers: + ps.http_client().configure_failpoints([("secondary-layer-download-sleep", "off")]) + + # The uploaded heatmap is still empty. Clean up all layers on the secondary. + ps_attached.http_client().tenant_secondary_download(tenant_id, wait_ms=100) + wait_until(lambda: no_layers_downloaded(ps_attached)) + + # Upload a new heatmap. The previously cold layers become hot since they're now resident. + ps_secondary.http_client().tenant_heatmap_upload(tenant_id) + + # Warm up the current secondary. + ps_attached.http_client().tenant_secondary_download(tenant_id, wait_ms=100) + wait_until(lambda: all_layers_downloaded(ps_secondary, expected_locally)) From 4b77807de9bc2ea550aff812303f44b71e64aefd Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 6 Mar 2025 16:32:17 +0100 Subject: [PATCH 109/207] fix(compute/sql_exporter): Ignore invalid DBs when collecting size (#11097) ## Problem Original Slack discussion: https://neondb.slack.com/archives/C04DGM6SMTM/p1739915430147169 TL;DR in Postgres, it's totally normal to have 'invalid' DBs (state after the interrupted `DROP DATABASE`). Yet, some of our metrics collected with `sql_exporter` try to get the size of such invalid DBs. Typical log lines: ``` time=2025-03-05T16:30:32.368Z level=ERROR source=promhttp.go:52 msg="Error gathering metrics" error="[from Gatherer #1] [collector=neon_collector,query=pg_stats_userdb] pq: [NEON_SMGR] [reqid 0] could not read db size of db 173228 from page server at lsn 0/44A0E8C0" time=2025-03-05T16:30:32.369Z level=ERROR source=promhttp.go:52 msg="Error gathering metrics" error="[from Gatherer #1] [collector=neon_collector,query=db_total_size] pq: [NEON_SMGR] [reqid 0] could not read db size of db 173228 from page server at lsn 0/44A0E8C0" ``` ## Summary of changes Ignore invalid DBs in these two metrics -- `pg_stats_userdb` and `db_total_size` --- compute/etc/sql_exporter/db_total_size.sql | 6 +++++- compute/etc/sql_exporter/pg_stats_userdb.sql | 16 +++++++++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/compute/etc/sql_exporter/db_total_size.sql b/compute/etc/sql_exporter/db_total_size.sql index 9cbbdfd8a3..fe0360ab5c 100644 --- a/compute/etc/sql_exporter/db_total_size.sql +++ b/compute/etc/sql_exporter/db_total_size.sql @@ -1 +1,5 @@ -SELECT sum(pg_database_size(datname)) AS total FROM pg_database; +SELECT sum(pg_database_size(datname)) AS total +FROM pg_database +-- Ignore invalid databases, as we will likely have problems with +-- getting their size from the Pageserver. +WHERE datconnlimit != -2; diff --git a/compute/etc/sql_exporter/pg_stats_userdb.sql b/compute/etc/sql_exporter/pg_stats_userdb.sql index 00ada87370..12e6c4ae59 100644 --- a/compute/etc/sql_exporter/pg_stats_userdb.sql +++ b/compute/etc/sql_exporter/pg_stats_userdb.sql @@ -1,10 +1,20 @@ -- We export stats for 10 non-system databases. Without this limit it is too -- easy to abuse the system by creating lots of databases. -SELECT pg_database_size(datname) AS db_size, deadlocks, tup_inserted AS inserted, - tup_updated AS updated, tup_deleted AS deleted, datname +SELECT pg_database_size(datname) AS db_size, + deadlocks, + tup_inserted AS inserted, + tup_updated AS updated, + tup_deleted AS deleted, + datname FROM pg_stat_database WHERE datname IN ( SELECT datname FROM pg_database - WHERE datname <> 'postgres' AND NOT datistemplate ORDER BY oid LIMIT 10 + -- Ignore invalid databases, as we will likely have problems with + -- getting their size from the Pageserver. + WHERE datconnlimit != -2 + AND datname <> 'postgres' + AND NOT datistemplate + ORDER BY oid + LIMIT 10 ); From 11334a2cdb8cf7e5c1c0245bbd61b45c54ac7d69 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 6 Mar 2025 11:44:00 -0500 Subject: [PATCH 110/207] feat(pageserver): more statistics for gc-compaction (#11103) ## Problem part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes * Add timers for each phase of the gc-compaction. * Add a final ratio computation to directly show the garbage collection ratio in the logs. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 107 +++++++++++++++++-- 1 file changed, 99 insertions(+), 8 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 17f7d96e5e..8fa79ddb22 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -7,6 +7,7 @@ use std::collections::{BinaryHeap, HashMap, HashSet, VecDeque}; use std::ops::{Deref, Range}; use std::sync::Arc; +use std::time::Instant; use super::layer_manager::LayerManager; use super::{ @@ -724,17 +725,41 @@ struct CompactionStatisticsNumSize { #[derive(Debug, Serialize, Default)] pub struct CompactionStatistics { + /// Delta layer visited (maybe compressed, physical size) delta_layer_visited: CompactionStatisticsNumSize, + /// Image layer visited (maybe compressed, physical size) image_layer_visited: CompactionStatisticsNumSize, + /// Delta layer produced (maybe compressed, physical size) delta_layer_produced: CompactionStatisticsNumSize, + /// Image layer produced (maybe compressed, physical size) image_layer_produced: CompactionStatisticsNumSize, - num_delta_layer_discarded: usize, - num_image_layer_discarded: usize, + /// Delta layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer) + delta_layer_discarded: CompactionStatisticsNumSize, + /// Image layer discarded (maybe compressed, physical size of the layer being discarded instead of the original layer) + image_layer_discarded: CompactionStatisticsNumSize, num_unique_keys_visited: usize, + /// Delta visited (uncompressed, original size) wal_keys_visited: CompactionStatisticsNumSize, + /// Image visited (uncompressed, original size) image_keys_visited: CompactionStatisticsNumSize, + /// Delta produced (uncompressed, original size) wal_produced: CompactionStatisticsNumSize, + /// Image produced (uncompressed, original size) image_produced: CompactionStatisticsNumSize, + + // Time spent in each phase + time_acquire_lock_secs: f64, + time_analyze_secs: f64, + time_download_layer_secs: f64, + time_main_loop_secs: f64, + time_final_phase_secs: f64, + time_total_secs: f64, + + // Summary + /// Ratio of the key-value size before/after gc-compaction. + uncompressed_size_ratio: f64, + /// Ratio of the physical size before/after gc-compaction. + physical_size_ratio: f64, } impl CompactionStatistics { @@ -784,11 +809,13 @@ impl CompactionStatistics { self.image_produced.num += 1; self.image_produced.size += val.len() as u64 + Self::estimated_size_of_key() as u64; } - fn discard_delta_layer(&mut self) { - self.num_delta_layer_discarded += 1; + fn discard_delta_layer(&mut self, original_size: u64) { + self.delta_layer_discarded.num += 1; + self.delta_layer_discarded.size += original_size; } - fn discard_image_layer(&mut self) { - self.num_image_layer_discarded += 1; + fn discard_image_layer(&mut self, original_size: u64) { + self.image_layer_discarded.num += 1; + self.image_layer_discarded.size += original_size; } fn produce_delta_layer(&mut self, size: u64) { self.delta_layer_produced.num += 1; @@ -798,6 +825,19 @@ impl CompactionStatistics { self.image_layer_produced.num += 1; self.image_layer_produced.size += size; } + fn finalize(&mut self) { + let original_key_value_size = self.image_keys_visited.size + self.wal_keys_visited.size; + let produced_key_value_size = self.image_produced.size + self.wal_produced.size; + self.uncompressed_size_ratio = + original_key_value_size as f64 / (produced_key_value_size as f64 + 1.0); // avoid div by 0 + let original_physical_size = self.image_layer_visited.size + self.delta_layer_visited.size; + let produced_physical_size = self.image_layer_produced.size + + self.delta_layer_produced.size + + self.image_layer_discarded.size + + self.delta_layer_discarded.size; // Also include the discarded layers to make the ratio accurate + self.physical_size_ratio = + original_physical_size as f64 / (produced_physical_size as f64 + 1.0); // avoid div by 0 + } } #[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] @@ -2595,6 +2635,9 @@ impl Timeline { // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. + let timer = Instant::now(); + let begin_timer = timer; + let gc_lock = async { tokio::select! { guard = self.gc_lock.lock() => Ok(guard), @@ -2602,6 +2645,9 @@ impl Timeline { } }; + let time_acquire_lock = timer.elapsed(); + let timer = Instant::now(); + let gc_lock = crate::timed( gc_lock, "acquires gc lock", @@ -2791,6 +2837,9 @@ impl Timeline { has_data_below, ); + let time_analyze = timer.elapsed(); + let timer = Instant::now(); + for layer in &job_desc.selected_layers { debug!("read layer: {}", layer.layer_desc().key()); } @@ -2893,6 +2942,9 @@ impl Timeline { .context("failed to create filter iterator") .map_err(CompactionError::Other)?; + let time_download_layer = timer.elapsed(); + let timer = Instant::now(); + // Step 2: Produce images+deltas. let mut accumulated_values = Vec::new(); let mut last_key: Option = None; @@ -3114,6 +3166,9 @@ impl Timeline { .map_err(CompactionError::Other)?; // end: move the above part to the loop body + let time_main_loop = timer.elapsed(); + let timer = Instant::now(); + let mut rewrote_delta_layers = Vec::new(); for (key, writers) in delta_layer_rewriters { if let Some(delta_writer_before) = writers.before { @@ -3178,6 +3233,13 @@ impl Timeline { let mut keep_layers = HashSet::new(); let produced_delta_layers_len = produced_delta_layers.len(); let produced_image_layers_len = produced_image_layers.len(); + + let layer_selection_by_key = job_desc + .selected_layers + .iter() + .map(|l| (l.layer_desc().key(), l.layer_desc().clone())) + .collect::>(); + for action in produced_delta_layers { match action { BatchWriterResult::Produced(layer) => { @@ -3191,8 +3253,16 @@ impl Timeline { if cfg!(debug_assertions) { info!("discarded delta layer: {}", l); } + if let Some(layer_desc) = layer_selection_by_key.get(&l) { + stat.discard_delta_layer(layer_desc.file_size()); + } else { + tracing::warn!( + "discarded delta layer not in layer_selection: {}, produced a layer outside of the compaction key range?", + l + ); + stat.discard_delta_layer(0); + } keep_layers.insert(l); - stat.discard_delta_layer(); } } } @@ -3201,6 +3271,9 @@ impl Timeline { "produced rewritten delta layer: {}", layer.layer_desc().key() ); + // For now, we include rewritten delta layer size in the "produce_delta_layer". We could + // make it a separate statistics in the future. + stat.produce_delta_layer(layer.layer_desc().file_size()); } compact_to.extend(rewrote_delta_layers); for action in produced_image_layers { @@ -3212,8 +3285,16 @@ impl Timeline { } BatchWriterResult::Discarded(l) => { debug!("discarded image layer: {}", l); + if let Some(layer_desc) = layer_selection_by_key.get(&l) { + stat.discard_image_layer(layer_desc.file_size()); + } else { + tracing::warn!( + "discarded image layer not in layer_selection: {}, produced a layer outside of the compaction key range?", + l + ); + stat.discard_image_layer(0); + } keep_layers.insert(l); - stat.discard_image_layer(); } } } @@ -3261,6 +3342,16 @@ impl Timeline { layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key())); + let time_final_phase = timer.elapsed(); + + stat.time_final_phase_secs = time_final_phase.as_secs_f64(); + stat.time_main_loop_secs = time_main_loop.as_secs_f64(); + stat.time_acquire_lock_secs = time_acquire_lock.as_secs_f64(); + stat.time_download_layer_secs = time_download_layer.as_secs_f64(); + stat.time_analyze_secs = time_analyze.as_secs_f64(); + stat.time_total_secs = begin_timer.elapsed().as_secs_f64(); + stat.finalize(); + info!( "gc-compaction statistics: {}", serde_json::to_string(&stat) From 3bb318a2959b38cdf6fb6b074432144926312ada Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Thu, 6 Mar 2025 18:47:54 +0100 Subject: [PATCH 111/207] run periodic page bench more frequently to simplify bi-secting regressions (#11121) ## Problem When periodic pagebench runs only once a day a lot of commits can be in between a good run and a regression. ## Summary of changes Run the workflow every 3 hours --- .github/workflows/periodic_pagebench.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/periodic_pagebench.yml b/.github/workflows/periodic_pagebench.yml index 0622faba33..f854bf3212 100644 --- a/.github/workflows/periodic_pagebench.yml +++ b/.github/workflows/periodic_pagebench.yml @@ -3,12 +3,12 @@ name: Periodic pagebench performance test on dedicated EC2 machine in eu-central on: schedule: # * is a special character in YAML so you have to quote this string - # ┌───────────── minute (0 - 59) - # │ ┌───────────── hour (0 - 23) - # │ │ ┌───────────── day of the month (1 - 31) - # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) - # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - - cron: '0 18 * * *' # Runs at 6 PM UTC every day + # ┌───────────── minute (0 - 59) + # │ ┌───────────── hour (0 - 23) + # │ │ ┌───────────── day of the month (1 - 31) + # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) + # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) + - cron: '0 */3 * * *' # Runs every 3 hours workflow_dispatch: # Allows manual triggering of the workflow inputs: commit_hash: From 3dee29eb0069b2d31a5fb3a810d7ec4c8966f6af Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Thu, 6 Mar 2025 19:14:19 +0000 Subject: [PATCH 112/207] Spawn rsyslog from neonvm (#11111) then configure it from compute_ctl. to make it more robust in case of restarts and rsyslogd crashes. --- compute/compute-node.Dockerfile | 10 ++-- compute/vm-image-spec-bookworm.yaml | 16 ++++++ compute/vm-image-spec-bullseye.yaml | 15 ++++++ compute_tools/src/compute.rs | 4 +- ...nf => compute_audit_rsyslog_template.conf} | 0 compute_tools/src/rsyslog.rs | 49 +++++++++---------- 6 files changed, 60 insertions(+), 34 deletions(-) rename compute_tools/src/config_template/{compute_rsyslog_template.conf => compute_audit_rsyslog_template.conf} (100%) diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 61d9d59f79..6e46185e36 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1980,12 +1980,10 @@ COPY --from=sql_exporter_preprocessor --chmod=0644 /home/nonroot/compute/etc/neo RUN echo '/usr/local/lib' >> /etc/ld.so.conf && /sbin/ldconfig # rsyslog config permissions -RUN chown postgres:postgres /etc/rsyslog.conf && \ - touch /etc/compute_rsyslog.conf && \ - chown -R postgres:postgres /etc/compute_rsyslog.conf && \ - # directory for rsyslogd pid file - mkdir /var/run/rsyslogd && \ - chown -R postgres:postgres /var/run/rsyslogd +# directory for rsyslogd pid file +RUN mkdir /var/run/rsyslogd && \ + chown -R postgres:postgres /var/run/rsyslogd && \ + chown -R postgres:postgres /etc/rsyslog.d/ ENV LANG=en_US.utf8 diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml index 74ff3a8b6d..e6707381ac 100644 --- a/compute/vm-image-spec-bookworm.yaml +++ b/compute/vm-image-spec-bookworm.yaml @@ -39,6 +39,10 @@ commands: user: nobody sysvInitAction: respawn shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' + - name: rsyslogd + user: postgres + sysvInitAction: respawn + shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf' shutdownHook: | su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' files: @@ -69,6 +73,12 @@ files: } memory {} } +# Create dummy rsyslog config, because it refuses to start without at least one action configured. +# compute_ctl will rewrite this file with the actual configuration, if needed. + - filename: compute_rsyslog.conf + content: | + *.* /dev/null + $IncludeConfig /etc/rsyslog.d/*.conf build: | # Build cgroup-tools # @@ -132,6 +142,12 @@ merge: | RUN set -e \ && chmod 0644 /etc/cgconfig.conf + + COPY compute_rsyslog.conf /etc/compute_rsyslog.conf + RUN chmod 0666 /etc/compute_rsyslog.conf + RUN chmod 0666 /var/log/ + + COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml index c1787ab018..c89ee112dc 100644 --- a/compute/vm-image-spec-bullseye.yaml +++ b/compute/vm-image-spec-bullseye.yaml @@ -39,6 +39,10 @@ commands: user: nobody sysvInitAction: respawn shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' + - name: rsyslogd + user: postgres + sysvInitAction: respawn + shell: '/usr/sbin/rsyslogd -n -i /var/run/rsyslogd/rsyslogd.pid -f /etc/compute_rsyslog.conf' shutdownHook: | su -p postgres --session-command '/usr/local/bin/pg_ctl stop -D /var/db/postgres/compute/pgdata -m fast --wait -t 10' files: @@ -69,6 +73,12 @@ files: } memory {} } +# Create dummy rsyslog config, because it refuses to start without at least one action configured. +# compute_ctl will rewrite this file with the actual configuration, if needed. + - filename: compute_rsyslog.conf + content: | + *.* /dev/null + $IncludeConfig /etc/rsyslog.d/*.conf build: | # Build cgroup-tools # @@ -128,6 +138,11 @@ merge: | RUN set -e \ && chmod 0644 /etc/cgconfig.conf + COPY compute_rsyslog.conf /etc/compute_rsyslog.conf + RUN chmod 0666 /etc/compute_rsyslog.conf + RUN chmod 0666 /var/log/ + + COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ COPY --from=libcgroup-builder /libcgroup-install/lib/* /usr/lib/ COPY --from=libcgroup-builder /libcgroup-install/sbin/* /usr/sbin/ diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index ddcfe12330..fed97ee2b2 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -37,7 +37,7 @@ use crate::logger::startup_context_from_env; use crate::lsn_lease::launch_lsn_lease_bg_task_for_static; use crate::monitor::launch_monitor; use crate::pg_helpers::*; -use crate::rsyslog::configure_and_start_rsyslog; +use crate::rsyslog::configure_audit_rsyslog; use crate::spec::*; use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; @@ -689,7 +689,7 @@ impl ComputeNode { let log_directory_path = Path::new(&self.params.pgdata).join("log"); // TODO: make this more robust // now rsyslog starts once and there is no monitoring or restart if it fails - configure_and_start_rsyslog( + configure_audit_rsyslog( log_directory_path.to_str().unwrap(), "hipaa", &remote_endpoint, diff --git a/compute_tools/src/config_template/compute_rsyslog_template.conf b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf similarity index 100% rename from compute_tools/src/config_template/compute_rsyslog_template.conf rename to compute_tools/src/config_template/compute_audit_rsyslog_template.conf diff --git a/compute_tools/src/rsyslog.rs b/compute_tools/src/rsyslog.rs index 776ff14644..c8fba4fdcd 100644 --- a/compute_tools/src/rsyslog.rs +++ b/compute_tools/src/rsyslog.rs @@ -21,40 +21,34 @@ fn get_rsyslog_pid() -> Option { } } -// Start rsyslogd with the specified configuration file -// If it is already running, do nothing. -fn start_rsyslog(rsyslog_conf_path: &str) -> Result<()> { - let pid = get_rsyslog_pid(); - if let Some(pid) = pid { - info!("rsyslogd is already running with pid: {}", pid); - return Ok(()); - } +// Restart rsyslogd to apply the new configuration. +// This is necessary, because there is no other way to reload the rsyslog configuration. +// +// Rsyslogd shouldn't lose any messages, because of the restart, +// because it tracks the last read position in the log files +// and will continue reading from that position. +// TODO: test it properly +// +fn restart_rsyslog() -> Result<()> { + let old_pid = get_rsyslog_pid().context("rsyslogd is not running")?; + info!("rsyslogd is running with pid: {}, restart it", old_pid); - let _ = Command::new("/usr/sbin/rsyslogd") - .arg("-f") - .arg(rsyslog_conf_path) - .arg("-i") - .arg("/var/run/rsyslogd/rsyslogd.pid") + // kill it to restart + let _ = Command::new("pkill") + .arg("rsyslogd") .output() - .context("Failed to start rsyslogd")?; - - // Check that rsyslogd is running - if let Some(pid) = get_rsyslog_pid() { - info!("rsyslogd started successfully with pid: {}", pid); - } else { - return Err(anyhow::anyhow!("Failed to start rsyslogd")); - } + .context("Failed to stop rsyslogd")?; Ok(()) } -pub fn configure_and_start_rsyslog( +pub fn configure_audit_rsyslog( log_directory: &str, tag: &str, remote_endpoint: &str, ) -> Result<()> { let config_content: String = format!( - include_str!("config_template/compute_rsyslog_template.conf"), + include_str!("config_template/compute_audit_rsyslog_template.conf"), log_directory = log_directory, tag = tag, remote_endpoint = remote_endpoint @@ -62,7 +56,7 @@ pub fn configure_and_start_rsyslog( info!("rsyslog config_content: {}", config_content); - let rsyslog_conf_path = "/etc/compute_rsyslog.conf"; + let rsyslog_conf_path = "/etc/rsyslog.d/compute_audit_rsyslog.conf"; let mut file = OpenOptions::new() .create(true) .write(true) @@ -71,10 +65,13 @@ pub fn configure_and_start_rsyslog( file.write_all(config_content.as_bytes())?; - info!("rsyslog configuration added successfully. Starting rsyslogd"); + info!( + "rsyslog configuration file {} added successfully. Starting rsyslogd", + rsyslog_conf_path + ); // start the service, using the configuration - start_rsyslog(rsyslog_conf_path)?; + restart_rsyslog()?; Ok(()) } From a485022300005491a62e3371764f029367380be0 Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Thu, 6 Mar 2025 20:54:29 +0100 Subject: [PATCH 113/207] fix(compute_ctl): Properly escape identifiers inside PL/pgSQL blocks (#11045) ## Problem In f37eeb56, I properly escaped the identifier, but I haven't noticed that the resulting string is used in the `format('...')`, so it needs additional escaping. Yet, after looking at it closer and with Heikki's and Tristan's help, it appeared to be that it's a full can of worms and we have problems all over the code in places where we use PL/pgSQL blocks. ## Summary of changes Add a new `pg_quote_dollar()` helper to deal with it, as dollar-quoting of strings seems to be the only robust way to escape strings in dynamic PL/pgSQL blocks. We mimic the Postgres' `pg_get_functiondef` logic here [1]. While on it, I added more tests and caught a couple of more bugs with string escaping: 1. `get_existing_dbs_async()` was wrapping `owner` in additional double-quotes if it contained special characters 2. `construct_superuser_query()` was flawed in even more ways than the rest of the code. It wasn't realistic to fix it quickly, but after thinking about it more, I realized that we could drop most of it altogether. IIUC, it was added as some sort of migration, probably back when we haven't had migrations yet. So all the complicated code was needed to properly update existing roles and DBs. In the current Neon, this code only runs before we create the very first DB and role. When we create roles and DBs, all `neon_superuser` grants are added in the different places. So the worst thing that could happen is that there is an ancient branch somewhere, so when users poke it, they will realize that not all Neon features work as expected. Yet, the fix is simple and self-serve -- just create a new role via UI or API, and it will get a proper `neon_superuser` grant. [1]: https://github.com/postgres/postgres/blob/8b49392b270b4ac0b9f5c210e2a503546841e832/src/backend/utils/adt/ruleutils.c#L3153 Closes neondatabase/cloud#25048 --- compute_tools/src/compute.rs | 73 --------- compute_tools/src/pg_helpers.rs | 34 +++- compute_tools/src/spec_apply.rs | 49 +++--- .../src/sql/create_neon_superuser.sql | 8 + compute_tools/src/sql/drop_subscriptions.sql | 4 +- .../sql/pre_drop_role_revoke_privileges.sql | 10 +- .../src/sql/set_public_schema_owner.sql | 7 +- .../src/sql/unset_template_for_drop_dbs.sql | 8 +- compute_tools/tests/pg_helpers_tests.rs | 17 ++ test_runner/regress/test_compute_catalog.py | 150 +++++++++++++----- 10 files changed, 210 insertions(+), 150 deletions(-) create mode 100644 compute_tools/src/sql/create_neon_superuser.sql diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index fed97ee2b2..354528e2cd 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -297,79 +297,6 @@ struct StartVmMonitorResult { vm_monitor: Option>>, } -pub(crate) fn construct_superuser_query(spec: &ComputeSpec) -> String { - let roles = spec - .cluster - .roles - .iter() - .map(|r| escape_literal(&r.name)) - .collect::>(); - - let dbs = spec - .cluster - .databases - .iter() - .map(|db| escape_literal(&db.name)) - .collect::>(); - - let roles_decl = if roles.is_empty() { - String::from("roles text[] := NULL;") - } else { - format!( - r#" - roles text[] := ARRAY(SELECT rolname - FROM pg_catalog.pg_roles - WHERE rolname IN ({}));"#, - roles.join(", ") - ) - }; - - let database_decl = if dbs.is_empty() { - String::from("dbs text[] := NULL;") - } else { - format!( - r#" - dbs text[] := ARRAY(SELECT datname - FROM pg_catalog.pg_database - WHERE datname IN ({}));"#, - dbs.join(", ") - ) - }; - - // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on all databases - // (see https://www.postgresql.org/docs/current/ddl-priv.html) - let query = format!( - r#" - DO $$ - DECLARE - r text; - {} - {} - BEGIN - IF NOT EXISTS ( - SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser') - THEN - CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data; - IF array_length(roles, 1) IS NOT NULL THEN - EXECUTE format('GRANT neon_superuser TO %s', - array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(roles) as x), ', ')); - FOREACH r IN ARRAY roles LOOP - EXECUTE format('ALTER ROLE %s CREATEROLE CREATEDB', quote_ident(r)); - END LOOP; - END IF; - IF array_length(dbs, 1) IS NOT NULL THEN - EXECUTE format('GRANT ALL PRIVILEGES ON DATABASE %s TO neon_superuser', - array_to_string(ARRAY(SELECT quote_ident(x) FROM unnest(dbs) as x), ', ')); - END IF; - END IF; - END - $$;"#, - roles_decl, database_decl, - ); - - query -} - impl ComputeNode { pub fn new( params: ComputeNodeParams, diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 5a2e305e1d..dd8d8e9b8b 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -186,15 +186,40 @@ impl DatabaseExt for Database { /// Postgres SQL queries and DATABASE_URL. pub trait Escaping { fn pg_quote(&self) -> String; + fn pg_quote_dollar(&self) -> (String, String); } impl Escaping for PgIdent { /// This is intended to mimic Postgres quote_ident(), but for simplicity it /// always quotes provided string with `""` and escapes every `"`. /// **Not idempotent**, i.e. if string is already escaped it will be escaped again. + /// N.B. it's not useful for escaping identifiers that are used inside WHERE + /// clause, use `escape_literal()` instead. fn pg_quote(&self) -> String { - let result = format!("\"{}\"", self.replace('"', "\"\"")); - result + format!("\"{}\"", self.replace('"', "\"\"")) + } + + /// This helper is intended to be used for dollar-escaping strings for usage + /// inside PL/pgSQL procedures. In addition to dollar-escaping the string, + /// it also returns a tag that is intended to be used inside the outer + /// PL/pgSQL procedure. If you do not need an outer tag, just discard it. + /// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`, + /// + fn pg_quote_dollar(&self) -> (String, String) { + let mut tag: String = "".to_string(); + let mut outer_tag = "x".to_string(); + + // Find the first suitable tag that is not present in the string. + // Postgres' max role/DB name length is 63 bytes, so even in the + // worst case it won't take long. + while self.contains(&format!("${tag}$")) || self.contains(&format!("${outer_tag}$")) { + tag += "x"; + outer_tag = tag.clone() + "x"; + } + + let escaped = format!("${tag}${self}${tag}$"); + + (escaped, outer_tag) } } @@ -226,10 +251,13 @@ pub async fn get_existing_dbs_async( // invalid state. See: // https://github.com/postgres/postgres/commit/a4b4cc1d60f7e8ccfcc8ff8cb80c28ee411ad9a9 let rowstream = client + // We use a subquery instead of a fancy `datdba::regrole::text AS owner`, + // because the latter automatically wraps the result in double quotes, + // if the role name contains special characters. .query_raw::( "SELECT datname AS name, - datdba::regrole::text AS owner, + (SELECT rolname FROM pg_roles WHERE oid = datdba) AS owner, NOT datallowconn AS restrict_conn, datconnlimit = - 2 AS invalid FROM diff --git a/compute_tools/src/spec_apply.rs b/compute_tools/src/spec_apply.rs index dbc02c8d02..e5f7aebbf8 100644 --- a/compute_tools/src/spec_apply.rs +++ b/compute_tools/src/spec_apply.rs @@ -13,16 +13,17 @@ use tokio_postgres::Client; use tokio_postgres::error::SqlState; use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; -use crate::compute::{ComputeNode, ComputeState, construct_superuser_query}; +use crate::compute::{ComputeNode, ComputeState}; use crate::pg_helpers::{ - DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, escape_literal, get_existing_dbs_async, + DatabaseExt, Escaping, GenericOptionsSearch, RoleExt, get_existing_dbs_async, get_existing_roles_async, }; use crate::spec_apply::ApplySpecPhase::{ - CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreatePgauditExtension, - CreatePgauditlogtofileExtension, CreateSchemaNeon, CreateSuperUser, DisablePostgresDBPgAudit, - DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, HandleNeonExtension, - HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, RunInEachDatabase, + CreateAndAlterDatabases, CreateAndAlterRoles, CreateAvailabilityCheck, CreateNeonSuperuser, + CreatePgauditExtension, CreatePgauditlogtofileExtension, CreateSchemaNeon, + DisablePostgresDBPgAudit, DropInvalidDatabases, DropRoles, FinalizeDropLogicalSubscriptions, + HandleNeonExtension, HandleOtherExtensions, RenameAndDeleteDatabases, RenameRoles, + RunInEachDatabase, }; use crate::spec_apply::PerDatabasePhase::{ ChangeSchemaPerms, DeleteDBRoleReferences, DropLogicalSubscriptions, HandleAnonExtension, @@ -187,7 +188,7 @@ impl ComputeNode { } for phase in [ - CreateSuperUser, + CreateNeonSuperuser, DropInvalidDatabases, RenameRoles, CreateAndAlterRoles, @@ -468,7 +469,7 @@ pub enum PerDatabasePhase { #[derive(Clone, Debug)] pub enum ApplySpecPhase { - CreateSuperUser, + CreateNeonSuperuser, DropInvalidDatabases, RenameRoles, CreateAndAlterRoles, @@ -595,14 +596,10 @@ async fn get_operations<'a>( apply_spec_phase: &'a ApplySpecPhase, ) -> Result + 'a + Send>> { match apply_spec_phase { - ApplySpecPhase::CreateSuperUser => { - let query = construct_superuser_query(spec); - - Ok(Box::new(once(Operation { - query, - comment: None, - }))) - } + ApplySpecPhase::CreateNeonSuperuser => Ok(Box::new(once(Operation { + query: include_str!("sql/create_neon_superuser.sql").to_string(), + comment: None, + }))), ApplySpecPhase::DropInvalidDatabases => { let mut ctx = ctx.write().await; let databases = &mut ctx.dbs; @@ -736,14 +733,15 @@ async fn get_operations<'a>( // We do not check whether the DB exists or not, // Postgres will take care of it for us "delete_db" => { + let (db_name, outer_tag) = op.name.pg_quote_dollar(); // In Postgres we can't drop a database if it is a template. // So we need to unset the template flag first, but it could // be a retry, so we could've already dropped the database. // Check that database exists first to make it idempotent. let unset_template_query: String = format!( include_str!("sql/unset_template_for_drop_dbs.sql"), - datname_str = escape_literal(&op.name), - datname = &op.name.pg_quote() + datname = db_name, + outer_tag = outer_tag, ); // Use FORCE to drop database even if there are active connections. @@ -850,6 +848,8 @@ async fn get_operations<'a>( comment: None, }, Operation { + // ALL PRIVILEGES grants CREATE, CONNECT, and TEMPORARY on the database + // (see https://www.postgresql.org/docs/current/ddl-priv.html) query: format!( "GRANT ALL PRIVILEGES ON DATABASE {} TO neon_superuser", db.name.pg_quote() @@ -909,9 +909,11 @@ async fn get_operations<'a>( PerDatabasePhase::DropLogicalSubscriptions => { match &db { DB::UserDB(db) => { + let (db_name, outer_tag) = db.name.pg_quote_dollar(); let drop_subscription_query: String = format!( include_str!("sql/drop_subscriptions.sql"), - datname_str = escape_literal(&db.name), + datname_str = db_name, + outer_tag = outer_tag, ); let operations = vec![Operation { @@ -950,6 +952,7 @@ async fn get_operations<'a>( DB::SystemDB => PgIdent::from("cloud_admin").pg_quote(), DB::UserDB(db) => db.owner.pg_quote(), }; + let (escaped_role, outer_tag) = op.name.pg_quote_dollar(); Some(vec![ // This will reassign all dependent objects to the db owner @@ -964,7 +967,9 @@ async fn get_operations<'a>( Operation { query: format!( include_str!("sql/pre_drop_role_revoke_privileges.sql"), - role_name = quoted, + // N.B. this has to be properly dollar-escaped with `pg_quote_dollar()` + role_name = escaped_role, + outer_tag = outer_tag, ), comment: None, }, @@ -989,12 +994,14 @@ async fn get_operations<'a>( DB::SystemDB => return Ok(Box::new(empty())), DB::UserDB(db) => db, }; + let (db_owner, outer_tag) = db.owner.pg_quote_dollar(); let operations = vec![ Operation { query: format!( include_str!("sql/set_public_schema_owner.sql"), - db_owner = db.owner.pg_quote() + db_owner = db_owner, + outer_tag = outer_tag, ), comment: None, }, diff --git a/compute_tools/src/sql/create_neon_superuser.sql b/compute_tools/src/sql/create_neon_superuser.sql new file mode 100644 index 0000000000..300645627b --- /dev/null +++ b/compute_tools/src/sql/create_neon_superuser.sql @@ -0,0 +1,8 @@ +DO $$ + BEGIN + IF NOT EXISTS (SELECT FROM pg_catalog.pg_roles WHERE rolname = 'neon_superuser') + THEN + CREATE ROLE neon_superuser CREATEDB CREATEROLE NOLOGIN REPLICATION BYPASSRLS IN ROLE pg_read_all_data, pg_write_all_data; + END IF; + END +$$; diff --git a/compute_tools/src/sql/drop_subscriptions.sql b/compute_tools/src/sql/drop_subscriptions.sql index 03e8e158fa..f5d9420130 100644 --- a/compute_tools/src/sql/drop_subscriptions.sql +++ b/compute_tools/src/sql/drop_subscriptions.sql @@ -1,4 +1,4 @@ -DO $$ +DO ${outer_tag}$ DECLARE subname TEXT; BEGIN @@ -9,4 +9,4 @@ BEGIN EXECUTE format('DROP SUBSCRIPTION %I;', subname); END LOOP; END; -$$; +${outer_tag}$; diff --git a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql index cdaa7071d3..4342650591 100644 --- a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql +++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql @@ -1,6 +1,6 @@ SET SESSION ROLE neon_superuser; -DO $$ +DO ${outer_tag}$ DECLARE schema TEXT; revoke_query TEXT; @@ -16,13 +16,15 @@ BEGIN WHERE schema_name IN ('public') LOOP revoke_query := format( - 'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM {role_name} GRANTED BY neon_superuser;', - schema + 'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY neon_superuser;', + schema, + -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()` + {role_name} ); EXECUTE revoke_query; END LOOP; END; -$$; +${outer_tag}$; RESET ROLE; diff --git a/compute_tools/src/sql/set_public_schema_owner.sql b/compute_tools/src/sql/set_public_schema_owner.sql index fd061a713e..dc502c6d2d 100644 --- a/compute_tools/src/sql/set_public_schema_owner.sql +++ b/compute_tools/src/sql/set_public_schema_owner.sql @@ -1,5 +1,4 @@ -DO -$$ +DO ${outer_tag}$ DECLARE schema_owner TEXT; BEGIN @@ -16,8 +15,8 @@ $$ IF schema_owner = 'cloud_admin' OR schema_owner = 'zenith_admin' THEN - ALTER SCHEMA public OWNER TO {db_owner}; + EXECUTE format('ALTER SCHEMA public OWNER TO %I', {db_owner}); END IF; END IF; END -$$; \ No newline at end of file +${outer_tag}$; \ No newline at end of file diff --git a/compute_tools/src/sql/unset_template_for_drop_dbs.sql b/compute_tools/src/sql/unset_template_for_drop_dbs.sql index 6c4343a589..36dc648beb 100644 --- a/compute_tools/src/sql/unset_template_for_drop_dbs.sql +++ b/compute_tools/src/sql/unset_template_for_drop_dbs.sql @@ -1,12 +1,12 @@ -DO $$ +DO ${outer_tag}$ BEGIN IF EXISTS( SELECT 1 FROM pg_catalog.pg_database - WHERE datname = {datname_str} + WHERE datname = {datname} ) THEN - ALTER DATABASE {datname} is_template false; + EXECUTE format('ALTER DATABASE %I is_template false', {datname}); END IF; END -$$; \ No newline at end of file +${outer_tag}$; diff --git a/compute_tools/tests/pg_helpers_tests.rs b/compute_tools/tests/pg_helpers_tests.rs index 4961bc293d..f2d74ff384 100644 --- a/compute_tools/tests/pg_helpers_tests.rs +++ b/compute_tools/tests/pg_helpers_tests.rs @@ -61,6 +61,23 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor assert_eq!(ident.pg_quote(), "\"\"\"name\"\";\\n select 1;\""); } + #[test] + fn ident_pg_quote_dollar() { + let test_cases = vec![ + ("name", ("$$name$$", "x")), + ("name$$", ("$x$name$$$x$", "xx")), + ("name$$$", ("$x$name$$$$x$", "xx")), + ("name$$$$", ("$x$name$$$$$x$", "xx")), + ("name$x$", ("$xx$name$x$$xx$", "xxx")), + ]; + + for (input, expected) in test_cases { + let (escaped, tag) = PgIdent::from(input).pg_quote_dollar(); + assert_eq!(escaped, expected.0); + assert_eq!(tag, expected.1); + } + } + #[test] fn generic_options_search() { let generic_options: GenericOptions = Some(vec![ diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py index 3a08671bbf..ce655d22b5 100644 --- a/test_runner/regress/test_compute_catalog.py +++ b/test_runner/regress/test_compute_catalog.py @@ -5,34 +5,59 @@ import logging import requests from fixtures.neon_fixtures import NeonEnv, logical_replication_sync +TEST_ROLE_NAMES = [ + {"name": "neondb_owner"}, + {"name": "role with spaces"}, + {"name": "role with%20spaces "}, + {"name": "role with whitespaces "}, + {"name": "injective role with spaces'; SELECT pg_sleep(1000);"}, + {"name": "role with #pound-sign and &ersands=true"}, + {"name": "role with emoji 🌍"}, + {"name": "role \";with ';injections $$ $x$ $ %I !/\\&#@"}, + {"name": '"role in double quotes"'}, + {"name": "'role in single quotes'"}, +] + TEST_DB_NAMES = [ { "name": "neondb", - "owner": "cloud_admin", + "owner": "neondb_owner", }, { "name": "db with spaces", - "owner": "cloud_admin", + "owner": "role with spaces", }, { "name": "db with%20spaces ", - "owner": "cloud_admin", + "owner": "role with%20spaces ", }, { "name": "db with whitespaces ", - "owner": "cloud_admin", + "owner": "role with whitespaces ", }, { - "name": "injective db with spaces'; SELECT pg_sleep(10);", - "owner": "cloud_admin", + "name": "injective db with spaces'; SELECT pg_sleep(1000);", + "owner": "injective role with spaces'; SELECT pg_sleep(1000);", }, { "name": "db with #pound-sign and &ersands=true", - "owner": "cloud_admin", + "owner": "role with #pound-sign and &ersands=true", }, { "name": "db with emoji 🌍", - "owner": "cloud_admin", + "owner": "role with emoji 🌍", + }, + { + "name": "db \";with ';injections $$ $x$ $ %I !/\\&#@", + "owner": "role \";with ';injections $$ $x$ $ %I !/\\&#@", + }, + { + "name": '"db in double quotes"', + "owner": '"role in double quotes"', + }, + { + "name": "'db in single quotes'", + "owner": "'role in single quotes'", }, ] @@ -52,6 +77,7 @@ def test_compute_catalog(neon_simple_env: NeonEnv): **{ "skip_pg_catalog_updates": False, "cluster": { + "roles": TEST_ROLE_NAMES, "databases": TEST_DB_NAMES, }, } @@ -99,10 +125,10 @@ def test_compute_catalog(neon_simple_env: NeonEnv): ), f"Expected 404 status code, but got {e.response.status_code}" -def test_compute_create_databases(neon_simple_env: NeonEnv): +def test_compute_create_drop_dbs_and_roles(neon_simple_env: NeonEnv): """ - Test that compute_ctl can create and work with databases with special - characters (whitespaces, %, tabs, etc.) in the name. + Test that compute_ctl can create and work with databases and roles + with special characters (whitespaces, %, tabs, etc.) in the name. """ env = neon_simple_env @@ -116,6 +142,7 @@ def test_compute_create_databases(neon_simple_env: NeonEnv): **{ "skip_pg_catalog_updates": False, "cluster": { + "roles": TEST_ROLE_NAMES, "databases": TEST_DB_NAMES, }, } @@ -139,6 +166,43 @@ def test_compute_create_databases(neon_simple_env: NeonEnv): assert len(curr_db) == 1 assert curr_db[0] == db["name"] + for role in TEST_ROLE_NAMES: + with endpoint.cursor() as cursor: + cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = %s", (role["name"],)) + catalog_role = cursor.fetchone() + assert catalog_role is not None + assert catalog_role[0] == role["name"] + + delta_operations = [] + for db in TEST_DB_NAMES: + delta_operations.append({"action": "delete_db", "name": db["name"]}) + for role in TEST_ROLE_NAMES: + delta_operations.append({"action": "delete_role", "name": role["name"]}) + + endpoint.respec_deep( + **{ + "skip_pg_catalog_updates": False, + "cluster": { + "roles": [], + "databases": [], + }, + "delta_operations": delta_operations, + } + ) + endpoint.reconfigure() + + for db in TEST_DB_NAMES: + with endpoint.cursor() as cursor: + cursor.execute("SELECT datname FROM pg_database WHERE datname = %s", (db["name"],)) + catalog_db = cursor.fetchone() + assert catalog_db is None + + for role in TEST_ROLE_NAMES: + with endpoint.cursor() as cursor: + cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = %s", (role["name"],)) + catalog_role = cursor.fetchone() + assert catalog_role is None + def test_dropdb_with_subscription(neon_simple_env: NeonEnv): """ @@ -150,17 +214,19 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv): # stuff into the spec.json file. endpoint = env.endpoints.create_start("main") + SUB_DB_NAME = "';subscriber_db $$ $x$ $;" + PUB_DB_NAME = "publisher_db" TEST_DB_NAMES = [ { "name": "neondb", "owner": "cloud_admin", }, { - "name": "subscriber_db", + "name": SUB_DB_NAME, "owner": "cloud_admin", }, { - "name": "publisher_db", + "name": PUB_DB_NAME, "owner": "cloud_admin", }, ] @@ -177,47 +243,47 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv): ) endpoint.reconfigure() - # connect to the publisher_db and create a publication - with endpoint.cursor(dbname="publisher_db") as cursor: + # Connect to the PUB_DB_NAME and create a publication + with endpoint.cursor(dbname=PUB_DB_NAME) as cursor: cursor.execute("CREATE PUBLICATION mypub FOR ALL TABLES") cursor.execute("select pg_catalog.pg_create_logical_replication_slot('mysub', 'pgoutput');") cursor.execute("CREATE TABLE t(a int)") cursor.execute("INSERT INTO t VALUES (1)") cursor.execute("CHECKPOINT") - # connect to the subscriber_db and create a subscription - # Note that we need to create subscription with - connstr = endpoint.connstr(dbname="publisher_db").replace("'", "''") - with endpoint.cursor(dbname="subscriber_db") as cursor: + # Connect to the SUB_DB_NAME and create a subscription + # Note that we need to create subscription with the following connstr: + connstr = endpoint.connstr(dbname=PUB_DB_NAME).replace("'", "''") + with endpoint.cursor(dbname=SUB_DB_NAME) as cursor: cursor.execute("CREATE TABLE t(a int)") cursor.execute( - f"CREATE SUBSCRIPTION mysub CONNECTION '{connstr}' PUBLICATION mypub WITH (create_slot = false) " + f"CREATE SUBSCRIPTION mysub CONNECTION '{connstr}' PUBLICATION mypub WITH (create_slot = false) " ) - # wait for the subscription to be active + # Wait for the subscription to be active logical_replication_sync( endpoint, endpoint, "mysub", - sub_dbname="subscriber_db", - pub_dbname="publisher_db", + sub_dbname=SUB_DB_NAME, + pub_dbname=PUB_DB_NAME, ) # Check that replication is working - with endpoint.cursor(dbname="subscriber_db") as cursor: + with endpoint.cursor(dbname=SUB_DB_NAME) as cursor: cursor.execute("SELECT * FROM t") rows = cursor.fetchall() assert len(rows) == 1 assert rows[0][0] == 1 - # drop the subscriber_db from the list + # Drop the SUB_DB_NAME from the list TEST_DB_NAMES_NEW = [ { "name": "neondb", "owner": "cloud_admin", }, { - "name": "publisher_db", + "name": PUB_DB_NAME, "owner": "cloud_admin", }, ] @@ -230,7 +296,7 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv): "databases": TEST_DB_NAMES_NEW, }, "delta_operations": [ - {"action": "delete_db", "name": "subscriber_db"}, + {"action": "delete_db", "name": SUB_DB_NAME}, # also test the case when we try to delete a non-existent database # shouldn't happen in normal operation, # but can occur when failed operations are retried @@ -239,22 +305,22 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv): } ) - logging.info("Reconfiguring the endpoint to drop the subscriber_db") + logging.info(f"Reconfiguring the endpoint to drop the {SUB_DB_NAME} database") endpoint.reconfigure() - # Check that the subscriber_db is dropped + # Check that the SUB_DB_NAME is dropped with endpoint.cursor() as cursor: - cursor.execute("SELECT datname FROM pg_database WHERE datname = %s", ("subscriber_db",)) + cursor.execute("SELECT datname FROM pg_database WHERE datname = %s", (SUB_DB_NAME,)) catalog_db = cursor.fetchone() assert catalog_db is None - # Check that we can still connect to the publisher_db - with endpoint.cursor(dbname="publisher_db") as cursor: + # Check that we can still connect to the PUB_DB_NAME + with endpoint.cursor(dbname=PUB_DB_NAME) as cursor: cursor.execute("SELECT * FROM current_database()") curr_db = cursor.fetchone() assert curr_db is not None assert len(curr_db) == 1 - assert curr_db[0] == "publisher_db" + assert curr_db[0] == PUB_DB_NAME def test_compute_drop_role(neon_simple_env: NeonEnv): @@ -265,6 +331,7 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): """ env = neon_simple_env TEST_DB_NAME = "db_with_permissions" + TEST_GRANTEE = "'); MALFORMED SQL $$ $x$ $/;5%$ %I" endpoint = env.endpoints.create_start("main") @@ -301,16 +368,18 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): cursor.execute("create view test_view as select * from test_table") with endpoint.cursor(dbname=TEST_DB_NAME, user="neon") as cursor: - cursor.execute("create role readonly") + cursor.execute(f'create role "{TEST_GRANTEE}"') # We (`compute_ctl`) make 'neon' the owner of schema 'public' in the owned database. # Postgres has all sorts of permissions and grants that we may not handle well, # but this is the shortest repro grant for the issue # https://github.com/neondatabase/cloud/issues/13582 - cursor.execute("grant select on all tables in schema public to readonly") + cursor.execute(f'grant select on all tables in schema public to "{TEST_GRANTEE}"') # Check that role was created with endpoint.cursor() as cursor: - cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'") + cursor.execute( + "SELECT rolname FROM pg_roles WHERE rolname = %(role)s", {"role": TEST_GRANTEE} + ) role = cursor.fetchone() assert role is not None @@ -318,7 +387,8 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): # that may block our ability to drop the role. with endpoint.cursor(dbname=TEST_DB_NAME) as cursor: cursor.execute( - "select grantor from information_schema.role_table_grants where grantee = 'readonly'" + "select grantor from information_schema.role_table_grants where grantee = %(grantee)s", + {"grantee": TEST_GRANTEE}, ) res = cursor.fetchall() assert len(res) == 2, f"Expected 2 table grants, got {len(res)}" @@ -332,7 +402,7 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): "delta_operations": [ { "action": "delete_role", - "name": "readonly", + "name": TEST_GRANTEE, }, ], } @@ -341,7 +411,9 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): # Check that role is dropped with endpoint.cursor() as cursor: - cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly'") + cursor.execute( + "SELECT rolname FROM pg_roles WHERE rolname = %(role)s", {"role": TEST_GRANTEE} + ) role = cursor.fetchone() assert role is None From 50d883d516b620c7f91ce20adf777c4c724f0b37 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Thu, 6 Mar 2025 20:59:17 +0100 Subject: [PATCH 114/207] Add performance-correctness to the CODEOWNERS (#11124) ## Problem After splitting teams it became a bit more complicated for the PerfCorr team to work on tests changes. ## Summary of changes 1. Add PerfCorr team co-owners for `.github/` folder 2. Add PerCorr team as owner for `test_runner/` folder --- CODEOWNERS | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CODEOWNERS b/CODEOWNERS index 71b5e65f94..ab6d2257a4 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -1,8 +1,9 @@ # Autoscaling /libs/vm_monitor/ @neondatabase/autoscaling -# DevProd -/.github/ @neondatabase/developer-productivity +# DevProd & PerfCorr +/.github/ @neondatabase/developer-productivity @neondatabase/performance-correctness +/test_runner/ @neondatabase/performance-correctness # Compute /pgxn/ @neondatabase/compute From e825974a2d2791611c5278e36e67560f61100ebd Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 6 Mar 2025 15:30:11 -0500 Subject: [PATCH 115/207] feat(pageserver): yield gc-compaction to L0 compaction (#11120) ## Problem Part of https://github.com/neondatabase/neon/issues/9114 ## Summary of changes gc-compaction could take a long time in some cases, for example, if the job split heuristics is wrong and we selected a too large region for compaction that can't be finished within a reasonable amount of time. We will give up such tasks and yield to L0 compaction. Each gc-compaction sub-compaction job is atomic and cannot be split further so we have to give up (instead of storing a state and continue later as in image creation). --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 62 ++++++++++++++++---- 1 file changed, 50 insertions(+), 12 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 8fa79ddb22..42b36f7252 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -20,6 +20,7 @@ use anyhow::{Context, anyhow}; use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; +use futures::FutureExt; use itertools::Itertools; use once_cell::sync::Lazy; use pageserver_api::config::tenant_conf_defaults::DEFAULT_CHECKPOINT_DISTANCE; @@ -443,6 +444,7 @@ impl GcCompactionQueue { )); }; let has_pending_tasks; + let mut yield_for_l0 = false; let Some((id, item)) = ({ let mut guard = self.inner.lock().unwrap(); if let Some((id, item)) = guard.queued.pop_front() { @@ -492,13 +494,23 @@ impl GcCompactionQueue { let mut guard = self.inner.lock().unwrap(); guard.guards.entry(id).or_default().gc_guard = Some(gc_guard); } - let _ = timeline.compact_with_options(cancel, options, ctx).await?; + let compaction_result = + timeline.compact_with_options(cancel, options, ctx).await?; self.notify_and_unblock(id); + if compaction_result == CompactionOutcome::YieldForL0 { + yield_for_l0 = true; + } } } GcCompactionQueueItem::SubCompactionJob(options) => { // TODO: error handling, clear the queue if any task fails? - let _ = timeline.compact_with_options(cancel, options, ctx).await?; + let compaction_result = timeline.compact_with_options(cancel, options, ctx).await?; + if compaction_result == CompactionOutcome::YieldForL0 { + // We will permenantly give up a task if we yield for L0 compaction: the preempted subcompaction job won't be running + // again. This ensures that we don't keep doing duplicated work within gc-compaction. Not directly returning here because + // we need to clean things up before returning from the function. + yield_for_l0 = true; + } } GcCompactionQueueItem::Notify(id, l2_lsn) => { self.notify_and_unblock(id); @@ -527,7 +539,10 @@ impl GcCompactionQueue { let mut guard = self.inner.lock().unwrap(); guard.running = None; } - Ok(if has_pending_tasks { + Ok(if yield_for_l0 { + tracing::info!("give up gc-compaction: yield for L0 compaction"); + CompactionOutcome::YieldForL0 + } else if has_pending_tasks { CompactionOutcome::Pending } else { CompactionOutcome::Done @@ -2598,7 +2613,7 @@ impl Timeline { cancel: &CancellationToken, options: CompactOptions, ctx: &RequestContext, - ) -> Result<(), CompactionError> { + ) -> Result { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); if sub_compaction { @@ -2620,7 +2635,7 @@ impl Timeline { if jobs_len == 0 { info!("no jobs to run, skipping gc bottom-most compaction"); } - return Ok(()); + return Ok(CompactionOutcome::Done); } self.compact_with_gc_inner(cancel, job, ctx).await } @@ -2630,7 +2645,7 @@ impl Timeline { cancel: &CancellationToken, job: GcCompactJob, ctx: &RequestContext, - ) -> Result<(), CompactionError> { + ) -> Result { // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. // Note that we already acquired the compaction lock when the outer `compact` function gets called. @@ -2699,7 +2714,7 @@ impl Timeline { tracing::warn!( "no layers to compact with gc: gc_cutoff not generated yet, skipping gc bottom-most compaction" ); - return Ok(()); + return Ok(CompactionOutcome::Skipped); } real_gc_cutoff } else { @@ -2737,7 +2752,7 @@ impl Timeline { "no layers to compact with gc: no historic layers below gc_cutoff, gc_cutoff={}", gc_cutoff ); - return Ok(()); + return Ok(CompactionOutcome::Done); }; // Next, if the user specifies compact_lsn_range.start, we need to filter some layers out. All the layers (strictly) below // the min_layer_lsn computed as below will be filtered out and the data will be accessed using the normal read path, as if @@ -2758,7 +2773,7 @@ impl Timeline { "no layers to compact with gc: no historic layers above compact_above_lsn, compact_above_lsn={}", compact_lsn_range.end ); - return Ok(()); + return Ok(CompactionOutcome::Done); }; // Then, pick all the layers that are below the max_layer_lsn. This is to ensure we can pick all single-key // layers to compact. @@ -2784,7 +2799,7 @@ impl Timeline { "no layers to compact with gc: no layers within the key range, gc_cutoff={}, key_range={}..{}", gc_cutoff, compact_key_range.start, compact_key_range.end ); - return Ok(()); + return Ok(CompactionOutcome::Done); } retain_lsns_below_horizon.sort(); GcCompactionJobDescription { @@ -2899,6 +2914,15 @@ impl Timeline { if cancel.is_cancelled() { return Err(CompactionError::ShuttingDown); } + let should_yield = self + .l0_compaction_trigger + .notified() + .now_or_never() + .is_some(); + if should_yield { + tracing::info!("preempt gc-compaction when downloading layers: too many L0 layers"); + return Ok(CompactionOutcome::YieldForL0); + } let resident_layer = layer .download_and_keep_resident(ctx) .await @@ -3019,6 +3043,8 @@ impl Timeline { // the key and LSN range are determined. However, to keep things simple here, we still // create this writer, and discard the writer in the end. + let mut keys_processed = 0; + while let Some(((key, lsn, val), desc)) = merge_iter .next_with_trace() .await @@ -3028,6 +3054,18 @@ impl Timeline { if cancel.is_cancelled() { return Err(CompactionError::ShuttingDown); } + keys_processed += 1; + if keys_processed % 1000 == 0 { + let should_yield = self + .l0_compaction_trigger + .notified() + .now_or_never() + .is_some(); + if should_yield { + tracing::info!("preempt gc-compaction in the main loop: too many L0 layers"); + return Ok(CompactionOutcome::YieldForL0); + } + } if self.shard_identity.is_key_disposable(&key) { // If this shard does not need to store this key, simply skip it. // @@ -3360,7 +3398,7 @@ impl Timeline { ); if dry_run { - return Ok(()); + return Ok(CompactionOutcome::Done); } info!( @@ -3479,7 +3517,7 @@ impl Timeline { drop(gc_lock); - Ok(()) + Ok(CompactionOutcome::Done) } } From cea67fc0624481d073fcb30fe97ff17aea8d556b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 7 Mar 2025 13:17:04 +0100 Subject: [PATCH 116/207] update ring to 0.17.13 (#11131) Update ring from 0.17.6 to 0.17.13. Addresses the advisory: https://rustsec.org/advisories/RUSTSEC-2025-0009 --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67f0fa4b77..d3b09fa360 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1127,9 +1127,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.1.30" +version = "1.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16803a61b81d9eabb7eae2588776c4c1e584b738ede45fdbb4c972cec1e9945" +checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" dependencies = [ "jobserver", "libc", @@ -5627,16 +5627,16 @@ dependencies = [ [[package]] name = "ring" -version = "0.17.6" +version = "0.17.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "684d5e6e18f669ccebf64a92236bb7db9a34f07be010e3627368182027180866" +checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee" dependencies = [ "cc", + "cfg-if", "getrandom 0.2.11", "libc", - "spin", "untrusted", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] From f5aa8c3eac0cebbfd6d0394d3f361526b06a8ded Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Fri, 7 Mar 2025 13:35:42 +0100 Subject: [PATCH 117/207] feat(compute_ctl): Add a basic HTTP API benchmark (#11123) ## Problem We just had a regression reported at https://neondb.slack.com/archives/C08EXUJF554/p1741102467515599, which clearly came with one of the releases. It's not a huge problem yet, but it's annoying that we cannot quickly attribute it to a specific commit. ## Summary of changes Add a very simple `compute_ctl` HTTP API benchmark that does 10k requests to `/status` and `metrics.json` and reports p50 and p99. --------- Co-authored-by: Peter Bendel --- test_runner/fixtures/endpoint/http.py | 12 ++++ .../performance/test_compute_ctl_api.py | 64 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 test_runner/performance/test_compute_ctl_api.py diff --git a/test_runner/fixtures/endpoint/http.py b/test_runner/fixtures/endpoint/http.py index cdc162fca2..9b28246f58 100644 --- a/test_runner/fixtures/endpoint/http.py +++ b/test_runner/fixtures/endpoint/http.py @@ -53,6 +53,18 @@ class EndpointHttpClient(requests.Session): res.raise_for_status() return res.text + # Current compute status. + def status(self): + res = self.get(f"http://localhost:{self.external_port}/status") + res.raise_for_status() + return res.json() + + # Compute startup-related metrics. + def metrics_json(self): + res = self.get(f"http://localhost:{self.external_port}/metrics.json") + res.raise_for_status() + return res.json() + def configure_failpoints(self, *args: tuple[str, str]) -> None: body: list[dict[str, str]] = [] diff --git a/test_runner/performance/test_compute_ctl_api.py b/test_runner/performance/test_compute_ctl_api.py new file mode 100644 index 0000000000..87eb1f2c35 --- /dev/null +++ b/test_runner/performance/test_compute_ctl_api.py @@ -0,0 +1,64 @@ +from __future__ import annotations + +import datetime + +import pytest +from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker +from fixtures.neon_fixtures import NeonEnv + + +@pytest.mark.timeout(120) +def test_compute_ctl_api_latencies( + neon_simple_env: NeonEnv, + zenbenchmark: NeonBenchmarker, +): + """ + Test compute_ctl HTTP API performance. Do simple GET requests + to catch any pathological degradations in the HTTP server. + """ + env = neon_simple_env + + endpoint = env.endpoints.create_start("main") + client = endpoint.http_client() + + NUM_REQUESTS = 10000 + + status_response_latency_us = [] + metrics_response_latency_us = [] + + for _i in range(NUM_REQUESTS): + start_time = datetime.datetime.now() + _ = client.status() + status_response_latency_us.append((datetime.datetime.now() - start_time).microseconds) + + start_time = datetime.datetime.now() + _ = client.metrics_json() + metrics_response_latency_us.append((datetime.datetime.now() - start_time).microseconds) + + status_response_latency_us = sorted(status_response_latency_us) + metrics_response_latency_us = sorted(metrics_response_latency_us) + + zenbenchmark.record( + "status_response_latency_p50_us", + status_response_latency_us[len(status_response_latency_us) // 2], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "metrics_response_latency_p50_us", + metrics_response_latency_us[len(metrics_response_latency_us) // 2], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "status_response_latency_p99_us", + status_response_latency_us[len(status_response_latency_us) * 99 // 100], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) + zenbenchmark.record( + "metrics_response_latency_p99_us", + metrics_response_latency_us[len(metrics_response_latency_us) * 99 // 100], + "microseconds", + MetricReport.LOWER_IS_BETTER, + ) From db77896e925d029cefe5b5b802881ef9a7db45f9 Mon Sep 17 00:00:00 2001 From: Fedor Dikarev Date: Fri, 7 Mar 2025 13:38:27 +0100 Subject: [PATCH 118/207] remove CODEOWNER assignement for the test_runner/ (#11130) ## Problem That adds an extra unnecessary load to the PerfCorr team ## Summary of changes Remove `CODEOWNERS` assignment for the `test_runner/` folder --- CODEOWNERS | 1 - 1 file changed, 1 deletion(-) diff --git a/CODEOWNERS b/CODEOWNERS index ab6d2257a4..2a112d9728 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -3,7 +3,6 @@ # DevProd & PerfCorr /.github/ @neondatabase/developer-productivity @neondatabase/performance-correctness -/test_runner/ @neondatabase/performance-correctness # Compute /pgxn/ @neondatabase/compute From f1b18874c36fdf66da824e54d64c52ac6a42ba51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 7 Mar 2025 14:29:48 +0100 Subject: [PATCH 119/207] storcon: require safekeeper jwt's in strict mode (#11116) We have introduced the ability to specify safekeeper JWTs for the storage controller. It now does heartbeats. We now want to also require the presence of those JWTs. Let's merge this PR shortly after the release cutoff. Part of / follow-up of https://github.com/neondatabase/cloud/issues/24727 --- storage_controller/src/main.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 967fb2996f..13aab930a7 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -286,18 +286,13 @@ async fn async_main() -> anyhow::Result<()> { let secrets = Secrets::load(&args).await?; - // TODO: once we've rolled out the safekeeper JWT token everywhere, put it into the validation code below - tracing::info!( - "safekeeper_jwt_token set: {:?}", - secrets.safekeeper_jwt_token.is_some() - ); - // Validate required secrets and arguments are provided in strict mode match strict_mode { StrictMode::Strict if (secrets.public_key.is_none() || secrets.pageserver_jwt_token.is_none() - || secrets.control_plane_jwt_token.is_none()) => + || secrets.control_plane_jwt_token.is_none() + || secrets.safekeeper_jwt_token.is_none()) => { // Production systems should always have secrets configured: if public_key was not set // then we would implicitly disable auth. From eedd179f0c9f8bf5eb32dd6f3d4245b4595bbec3 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 7 Mar 2025 15:38:01 +0100 Subject: [PATCH 120/207] storcon: initial autosplit tweaks (#11134) ## Problem This patch makes some initial tweaks as preparation for https://github.com/neondatabase/cloud/issues/22532, where we will be introducing additional autosplit logic. The plan is outlined in https://github.com/neondatabase/cloud/issues/22532#issuecomment-2706215907. ## Summary of changes Minor code cleanups and behavioral changes: * Decide that we'll split based on `max_logical_size` (later possibly `total_logical_size`). * Fix a bug that would split the smallest candidate rather than the largest. * Pick the largest candidate by `max_logical_size` rather than `resident_size`, for consistency (debatable). * Split out `get_top_tenant_shards()` to fetch split candidates. * Fetch candidates concurrently from all nodes. * Make `TenantShard.get_scheduling_policy()` return a copy instead of a reference. --- storage_controller/src/service.rs | 184 ++++++++++++++----------- storage_controller/src/tenant_shard.rs | 4 +- 2 files changed, 102 insertions(+), 86 deletions(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index e12bd299ce..caa2040ce2 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -36,9 +36,9 @@ use pageserver_api::models::{ self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization, SecondaryProgress, ShardParameters, TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse, - TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse, + TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, - TopTenantShardsRequest, + TopTenantShardItem, TopTenantShardsRequest, }; use pageserver_api::shard::{ ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId, @@ -53,7 +53,7 @@ use safekeeper_api::models::SafekeeperUtilization; use tokio::sync::TryAcquireError; use tokio::sync::mpsc::error::TrySendError; use tokio_util::sync::CancellationToken; -use tracing::{Instrument, instrument}; +use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; use utils::completion::Barrier; use utils::generation::Generation; use utils::id::{NodeId, TenantId, TimelineId}; @@ -370,8 +370,12 @@ pub struct Config { /// tenant-scoped API endpoints. Further API requests queue until ready. pub tenant_rate_limit: NonZeroU32, - /// How large must a shard grow in bytes before we split it? - /// None disables auto-splitting. + /// The size at which an unsharded tenant should be split (into 8 shards). This uses the logical + /// size of the largest timeline in the shard (i.e. max_logical_size). + /// + /// None or 0 disables auto-splitting. + /// + /// TODO: consider using total logical size of all timelines instead. pub split_threshold: Option, // TODO: make this cfg(feature = "testing") @@ -4364,7 +4368,7 @@ impl Service { is_reconciling: shard.reconciler.is_some(), is_pending_compute_notification: shard.pending_compute_notification, is_splitting: matches!(shard.splitting, SplitState::Splitting), - scheduling_policy: *shard.get_scheduling_policy(), + scheduling_policy: shard.get_scheduling_policy(), preferred_az_id: shard.preferred_az().map(ToString::to_string), }) } @@ -7232,86 +7236,57 @@ impl Service { } } - /// Look for shards which are oversized and in need of splitting + /// Asynchronously split a tenant that's eligible for automatic splits: + /// + /// * The tenant is unsharded. + /// * The logical size of its largest timeline exceeds split_threshold. + /// * The tenant's scheduling policy is active. + /// + /// At most one tenant will be split per call: the one with the largest max logical size. It + /// will split 1 → 8 shards. + /// + /// TODO: consider splitting based on total logical size rather than max logical size. + /// + /// TODO: consider spawning multiple splits in parallel: this is only called once every 20 + /// seconds, so a large backlog can take a long time, and if a tenant fails to split it will + /// block all other splits. async fn autosplit_tenants(self: &Arc) { let Some(split_threshold) = self.config.split_threshold else { - // Auto-splitting is disabled + return; // auto-splits are disabled + }; + if split_threshold == 0 { return; - }; - - let nodes = self.inner.read().unwrap().nodes.clone(); - - const SPLIT_TO_MAX: ShardCount = ShardCount::new(8); - - let mut top_n = Vec::new(); - - // Call into each node to look for big tenants - let top_n_request = TopTenantShardsRequest { - // We currently split based on logical size, for simplicity: logical size is a signal of - // the user's intent to run a large database, whereas physical/resident size can be symptoms - // of compaction issues. Eventually we should switch to using resident size to bound the - // disk space impact of one shard. - order_by: models::TenantSorting::MaxLogicalSize, - limit: 10, - where_shards_lt: Some(SPLIT_TO_MAX), - where_gt: Some(split_threshold), - }; - for node in nodes.values() { - let request_ref = &top_n_request; - match node - .with_client_retries( - |client| async move { - let request = request_ref.clone(); - client.top_tenant_shards(request.clone()).await - }, - &self.config.pageserver_jwt_token, - 3, - 3, - Duration::from_secs(5), - &self.cancel, - ) - .await - { - Some(Ok(node_top_n)) => { - top_n.extend(node_top_n.shards.into_iter()); - } - Some(Err(mgmt_api::Error::Cancelled)) => { - continue; - } - Some(Err(e)) => { - tracing::warn!("Failed to fetch top N tenants from {node}: {e}"); - continue; - } - None => { - // Node is shutting down - continue; - } - }; } - // Pick the biggest tenant to split first - top_n.sort_by_key(|i| i.resident_size); + // Fetch the largest eligible shards by logical size. + const MAX_SHARDS: ShardCount = ShardCount::new(8); - // Filter out tenants in a prohibiting scheduling mode + let mut top_n = self + .get_top_tenant_shards(&TopTenantShardsRequest { + order_by: TenantSorting::MaxLogicalSize, + limit: 10, + where_shards_lt: Some(MAX_SHARDS), + where_gt: Some(split_threshold), + }) + .await; + + // Filter out tenants in a prohibiting scheduling mode. { - let locked = self.inner.read().unwrap(); + let state = self.inner.read().unwrap(); top_n.retain(|i| { - if let Some(shard) = locked.tenants.get(&i.id) { - matches!(shard.get_scheduling_policy(), ShardSchedulingPolicy::Active) - } else { - false - } + let policy = state.tenants.get(&i.id).map(|s| s.get_scheduling_policy()); + policy == Some(ShardSchedulingPolicy::Active) }); } let Some(split_candidate) = top_n.into_iter().next() else { - tracing::debug!("No split-elegible shards found"); + debug!("No split-elegible shards found"); return; }; - // We spawn a task to run this, so it's exactly like some external API client requesting it. We don't - // want to block the background reconcile loop on this. - tracing::info!( + // We spawn a task to run this, so it's exactly like some external API client requesting it. + // We don't want to block the background reconcile loop on this. + info!( "Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}" ); @@ -7322,29 +7297,70 @@ impl Service { .tenant_shard_split( split_candidate.id.tenant_id, TenantShardSplitRequest { - // Always split to the max number of shards: this avoids stepping through - // intervening shard counts and encountering the overrhead of a split+cleanup - // each time as a tenant grows, and is not too expensive because our max shard - // count is relatively low anyway. - // This policy will be adjusted in future once we support higher shard count. - new_shard_count: SPLIT_TO_MAX.literal(), + // Always split to the max number of shards: this avoids stepping + // through intervening shard counts and encountering the overhead of a + // split+cleanup each time as a tenant grows, and is not too expensive + // because our max shard count is relatively low anyway. This policy + // will be adjusted in future once we support higher shard count. + new_shard_count: MAX_SHARDS.literal(), new_stripe_size: Some(ShardParameters::DEFAULT_STRIPE_SIZE), }, ) .await { - Ok(_) => { - tracing::info!("Successful auto-split"); - } - Err(e) => { - tracing::error!("Auto-split failed: {e}"); - } + Ok(_) => info!("Successful auto-split"), + Err(err) => error!("Auto-split failed: {err}"), } } - .instrument(tracing::info_span!("auto_split", tenant_id=%split_candidate.id.tenant_id)), + .instrument(info_span!("auto_split", tenant_id=%split_candidate.id.tenant_id)), ); } + /// Fetches the top tenant shards from every node, in descending order of + /// max logical size. Any node errors will be logged and ignored. + async fn get_top_tenant_shards( + &self, + request: &TopTenantShardsRequest, + ) -> Vec { + let nodes = self + .inner + .read() + .unwrap() + .nodes + .values() + .cloned() + .collect_vec(); + + let mut futures = FuturesUnordered::new(); + for node in nodes { + futures.push(async move { + node.with_client_retries( + |client| async move { client.top_tenant_shards(request.clone()).await }, + &self.config.pageserver_jwt_token, + 3, + 3, + Duration::from_secs(5), + &self.cancel, + ) + .await + }); + } + + let mut top = Vec::new(); + while let Some(output) = futures.next().await { + match output { + Some(Ok(response)) => top.extend(response.shards), + Some(Err(mgmt_api::Error::Cancelled)) => {} + Some(Err(err)) => warn!("failed to fetch top tenants: {err}"), + None => {} // node is shutting down + } + } + + top.sort_by_key(|i| i.max_logical_size); + top.reverse(); + top + } + /// Useful for tests: run whatever work a background [`Self::reconcile_all`] would have done, but /// also wait for any generated Reconcilers to complete. Calling this until it returns zero should /// put the system into a quiescent state where future background reconciliations won't do anything. diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 34fd244023..27e478043e 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -1710,8 +1710,8 @@ impl TenantShard { self.scheduling_policy = p; } - pub(crate) fn get_scheduling_policy(&self) -> &ShardSchedulingPolicy { - &self.scheduling_policy + pub(crate) fn get_scheduling_policy(&self) -> ShardSchedulingPolicy { + self.scheduling_policy } pub(crate) fn set_last_error(&mut self, sequence: Sequence, error: ReconcileError) { From a4ce20db5ca0d85d36b5343fe71fd1111a7ff9f1 Mon Sep 17 00:00:00 2001 From: Alexander Lakhin Date: Fri, 7 Mar 2025 17:00:06 +0200 Subject: [PATCH 121/207] Support workflow_dispatch event in _meta.yml (#11133) ## Problem Allow for using _meta.yml with workflow_dispatch event. ## Summary of changes Handle this event in the run-kind step; fix and update the description of the run-kind output. --- .github/workflows/_meta.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index 9454533fbb..a3fc125648 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -19,7 +19,7 @@ on: description: "Tag of the last compute release" value: ${{ jobs.tags.outputs.compute }} run-kind: - description: "The kind of run we're currently in. Will be one of `pr`, `push-main`, `storage-rc`, `storage-release`, `proxy-rc`, `proxy-release`, `compute-rc`, `compute-release` or `merge_queue`" + description: "The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`, `proxy-rc-pr`, `pr`, or `workflow-dispatch`" value: ${{ jobs.tags.outputs.run-kind }} permissions: {} @@ -55,6 +55,7 @@ jobs: || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-compute') && 'compute-rc-pr' || (inputs.github-event-name == 'pull_request' && github.base_ref == 'release-proxy') && 'proxy-rc-pr' || (inputs.github-event-name == 'pull_request') && 'pr' + || (inputs.github-event-name == 'workflow_dispatch') && 'workflow-dispatch' || 'unknown' }} run: | @@ -85,6 +86,9 @@ jobs: BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT ;; + workflow-dispatch) + echo "tag=$GITHUB_RUN_ID" | tee -a $GITHUB_OUTPUT + ;; *) echo "Unexpected RUN_KIND ('${RUN_KIND}'), failing to assign build-tag!" exit 1 From 937876cbe2544fbbd4a436ad6d207909808cce5d Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 7 Mar 2025 15:52:01 +0000 Subject: [PATCH 122/207] safekeeper: don't skip empty records for shard zero (#11137) ## Problem Shard zero needs to track the start LSN of the latest record in adition to the LSN of the next record to ingest. The former is included in basebackup persisted by the compute in WAL. Previously, empty records were skipped for all shards. This caused the prev LSN tracking on the PS to fall behind and led to logical replication issues. ## Summary of changes Shard zero now receives emtpy interpreted records for LSN tracking purposes. A test is included too. --- safekeeper/src/send_interpreted_wal.rs | 119 ++++++++++++++++++++++--- safekeeper/src/test_utils.rs | 3 +- safekeeper/src/wal_reader_stream.rs | 2 +- 3 files changed, 112 insertions(+), 12 deletions(-) diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index bf03f27d48..c71f23a010 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -430,7 +430,10 @@ impl InterpretedWalReader { .with_context(|| "Failed to interpret WAL")?; for (shard, record) in interpreted { - if record.is_empty() { + // Shard zero needs to track the start LSN of the latest record + // in adition to the LSN of the next record to ingest. The former + // is included in basebackup persisted by the compute in WAL. + if !shard.is_shard_zero() && record.is_empty() { continue; } @@ -740,7 +743,7 @@ mod tests { .unwrap(); let resident_tli = tli.wal_residence_guard().await.unwrap(); - let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None) + let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, c"neon-file:", None) .await .unwrap(); let end_pos = end_watch.get(); @@ -883,10 +886,16 @@ mod tests { let resident_tli = tli.wal_residence_guard().await.unwrap(); let mut next_record_lsns = Vec::default(); - let end_watch = - Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns)) - .await - .unwrap(); + let end_watch = Env::write_wal( + tli, + start_lsn, + SIZE, + MSG_COUNT, + c"neon-file:", + Some(&mut next_record_lsns), + ) + .await + .unwrap(); let end_pos = end_watch.get(); let streaming_wal_reader = StreamingWalReader::new( @@ -1027,10 +1036,16 @@ mod tests { .unwrap(); let resident_tli = tli.wal_residence_guard().await.unwrap(); - let end_watch = - Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, Some(&mut next_record_lsns)) - .await - .unwrap(); + let end_watch = Env::write_wal( + tli, + start_lsn, + SIZE, + MSG_COUNT, + c"neon-file:", + Some(&mut next_record_lsns), + ) + .await + .unwrap(); assert!(next_record_lsns.len() > 3); let shard_0_start_lsn = next_record_lsns[3]; @@ -1124,4 +1139,88 @@ mod tests { } } } + + #[tokio::test] + async fn test_shard_zero_does_not_skip_empty_records() { + let _ = env_logger::builder().is_test(true).try_init(); + + const SIZE: usize = 8 * 1024; + const MSG_COUNT: usize = 10; + const PG_VERSION: u32 = 17; + + let start_lsn = Lsn::from_str("0/149FD18").unwrap(); + let env = Env::new(true).unwrap(); + let tli = env + .make_timeline(NodeId(1), TenantTimelineId::generate(), start_lsn) + .await + .unwrap(); + + let resident_tli = tli.wal_residence_guard().await.unwrap(); + let mut next_record_lsns = Vec::new(); + let end_watch = Env::write_wal( + tli, + start_lsn, + SIZE, + MSG_COUNT, + // This is a logical message prefix that is not persisted to key value storage. + // We use it in order to validate that shard zero receives emtpy interpreted records. + c"test:", + Some(&mut next_record_lsns), + ) + .await + .unwrap(); + let end_pos = end_watch.get(); + + let streaming_wal_reader = StreamingWalReader::new( + resident_tli, + None, + start_lsn, + end_pos, + end_watch, + MAX_SEND_SIZE, + ); + + let shard = ShardIdentity::unsharded(); + let (records_tx, mut records_rx) = tokio::sync::mpsc::channel::(MSG_COUNT * 2); + + let handle = InterpretedWalReader::spawn( + streaming_wal_reader, + start_lsn, + records_tx, + shard, + PG_VERSION, + &Some("pageserver".to_string()), + ); + + let mut interpreted_records = Vec::new(); + while let Some(batch) = records_rx.recv().await { + interpreted_records.push(batch.records); + if batch.wal_end_lsn == batch.available_wal_end_lsn { + break; + } + } + + let received_next_record_lsns = interpreted_records + .into_iter() + .flat_map(|b| b.records) + .map(|rec| rec.next_record_lsn) + .collect::>(); + + // By default this also includes the start LSN. Trim it since it shouldn't be received. + let next_record_lsns = next_record_lsns.into_iter().skip(1).collect::>(); + + assert_eq!(received_next_record_lsns, next_record_lsns); + + handle.abort(); + let mut done = false; + for _ in 0..5 { + if handle.current_position().is_none() { + done = true; + break; + } + tokio::time::sleep(Duration::from_millis(1)).await; + } + + assert!(done); + } } diff --git a/safekeeper/src/test_utils.rs b/safekeeper/src/test_utils.rs index e6f74185c1..618e2b59d2 100644 --- a/safekeeper/src/test_utils.rs +++ b/safekeeper/src/test_utils.rs @@ -1,3 +1,4 @@ +use std::ffi::CStr; use std::sync::Arc; use camino_tempfile::Utf8TempDir; @@ -124,6 +125,7 @@ impl Env { start_lsn: Lsn, msg_size: usize, msg_count: usize, + prefix: &CStr, mut next_record_lsns: Option<&mut Vec>, ) -> anyhow::Result { let (msg_tx, msg_rx) = tokio::sync::mpsc::channel(receive_wal::MSG_QUEUE_SIZE); @@ -133,7 +135,6 @@ impl Env { WalAcceptor::spawn(tli.wal_residence_guard().await?, msg_rx, reply_tx, Some(0)); - let prefix = c"neon-file:"; let prefixlen = prefix.to_bytes_with_nul().len(); assert!(msg_size >= prefixlen); let message = vec![0; msg_size - prefixlen]; diff --git a/safekeeper/src/wal_reader_stream.rs b/safekeeper/src/wal_reader_stream.rs index cc9d4e6e3b..aab82fedb5 100644 --- a/safekeeper/src/wal_reader_stream.rs +++ b/safekeeper/src/wal_reader_stream.rs @@ -246,7 +246,7 @@ mod tests { .unwrap(); let resident_tli = tli.wal_residence_guard().await.unwrap(); - let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, None) + let end_watch = Env::write_wal(tli, start_lsn, SIZE, MSG_COUNT, c"neon-file:", None) .await .unwrap(); let end_pos = end_watch.get(); From 084fc4a757e2191873de797f9c0dba22d2ea49ac Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Fri, 7 Mar 2025 16:05:31 +0000 Subject: [PATCH 123/207] pageserver: enable previous heatmaps by default (#11132) We add the off by default configs in https://github.com/neondatabase/neon/pull/11088 because the unarchival heatmap was causing oversized secondary locations. That was fixed in https://github.com/neondatabase/neon/pull/11098, so let's turn them on by default. --- pageserver/src/config.rs | 4 ++-- test_runner/fixtures/neon_fixtures.py | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 582019d96f..06be873160 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -456,8 +456,8 @@ impl PageServerConf { no_sync: no_sync.unwrap_or(false), enable_read_path_debugging: enable_read_path_debugging.unwrap_or(false), validate_wal_contiguity: validate_wal_contiguity.unwrap_or(false), - load_previous_heatmap: load_previous_heatmap.unwrap_or(false), - generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(false), + load_previous_heatmap: load_previous_heatmap.unwrap_or(true), + generate_unarchival_heatmap: generate_unarchival_heatmap.unwrap_or(true), }; // ------------------------------------------------------------ diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 8e3277a34a..61e1ec79ad 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1174,15 +1174,6 @@ class NeonEnv: "max_batch_size": 32, } - if config.test_may_use_compatibility_snapshot_binaries: - log.info( - "Skipping prev heatmap settings to avoid forward-compatibility related test failures" - ) - else: - # Look for gaps in WAL received from safekeepeers - ps_cfg["load_previous_heatmap"] = True - ps_cfg["generate_unarchival_heatmap"] = True - get_vectored_concurrent_io = self.pageserver_get_vectored_concurrent_io if get_vectored_concurrent_io is not None: ps_cfg["get_vectored_concurrent_io"] = { From 87e6117dfda0ea49f5b30fed190ad7ad82949bc2 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 7 Mar 2025 17:02:38 +0000 Subject: [PATCH 124/207] storage controller: API-driven graceful migrations (#10913) ## Problem The current migration API does a live migration, but if the destination doesn't already have a secondary, that live migration is unlikely to be able to warm up a tenant properly within its timeout (full warmup of a big tenant can take tens of minutes). Background optimisation code knows how to do this gracefully by creating a secondary first, but we don't currently give a human a way to trigger that. Closes: https://github.com/neondatabase/neon/issues/10540 ## Summary of changes - Add `prefererred_node` parameter to TenantShard, which is respected by optimize_attachment - Modify migration API to have optional prewarm=true mode, in which we set preferred_node and call optimize_attachment, rather than directly modifying intentstate - Require override_scheduler=true flag if migrating somewhere that is a less-than-optimal scheduling location (e.g. wrong AZ) - Add `origin_node_id` to migration API so that callers can ensure they're moving from where they think they're moving from - Add tests for the above The storcon_cli wrapper for this has a 'watch' mode that waits for eventual cutover. This doesn't show the warmth of the secondary evolve because we don't currently have an API for that in the controller, as the passthrough API only targets attached locations, not secondaries. It would be straightforward to add later as a dedicated endpoint for getting secondary status, then extend the storcon_cli to consume that and print a nice progress indicator. --- control_plane/src/storage_controller.rs | 44 +-- control_plane/storcon_cli/src/main.rs | 126 ++++++++- libs/pageserver_api/src/controller_api.rs | 89 +++++- storage_controller/src/service.rs | 255 ++++++++++++++---- storage_controller/src/tenant_shard.rs | 151 ++++++++++- test_runner/fixtures/neon_fixtures.py | 23 +- .../test_storage_controller_scale.py | 6 +- .../regress/test_pageserver_secondary.py | 2 +- .../regress/test_storage_controller.py | 131 ++++++++- 9 files changed, 707 insertions(+), 120 deletions(-) diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 16e12f4e02..3604e4a241 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -12,13 +12,10 @@ use hyper0::Uri; use nix::unistd::Pid; use pageserver_api::controller_api::{ NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest, - TenantCreateResponse, TenantLocateResponse, TenantShardMigrateRequest, - TenantShardMigrateResponse, + TenantCreateResponse, TenantLocateResponse, }; -use pageserver_api::models::{ - TenantShardSplitRequest, TenantShardSplitResponse, TimelineCreateRequest, TimelineInfo, -}; -use pageserver_api::shard::{ShardStripeSize, TenantShardId}; +use pageserver_api::models::{TimelineCreateRequest, TimelineInfo}; +use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api::ResponseErrorMessageExt; use postgres_backend::AuthType; use reqwest::Method; @@ -825,41 +822,6 @@ impl StorageController { .await } - #[instrument(skip(self))] - pub async fn tenant_migrate( - &self, - tenant_shard_id: TenantShardId, - node_id: NodeId, - ) -> anyhow::Result { - self.dispatch( - Method::PUT, - format!("control/v1/tenant/{tenant_shard_id}/migrate"), - Some(TenantShardMigrateRequest { - node_id, - migration_config: None, - }), - ) - .await - } - - #[instrument(skip(self), fields(%tenant_id, %new_shard_count))] - pub async fn tenant_split( - &self, - tenant_id: TenantId, - new_shard_count: u8, - new_stripe_size: Option, - ) -> anyhow::Result { - self.dispatch( - Method::PUT, - format!("control/v1/tenant/{tenant_id}/shard_split"), - Some(TenantShardSplitRequest { - new_shard_count, - new_stripe_size, - }), - ) - .await - } - #[instrument(skip_all, fields(node_id=%req.node_id))] pub async fn node_register(&self, req: NodeRegisterRequest) -> anyhow::Result<()> { self.dispatch::<_, ()>(Method::POST, "control/v1/node".to_string(), Some(req)) diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index 2e2c22c791..c3f157a9cc 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -5,12 +5,12 @@ use std::time::Duration; use clap::{Parser, Subcommand}; use futures::StreamExt; use pageserver_api::controller_api::{ - AvailabilityZone, NodeAvailabilityWrapper, NodeConfigureRequest, NodeDescribeResponse, - NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, PlacementPolicy, - SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, ShardSchedulingPolicy, - ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, SkSchedulingPolicy, TenantCreateRequest, - TenantDescribeResponse, TenantPolicyRequest, TenantShardMigrateRequest, - TenantShardMigrateResponse, + AvailabilityZone, MigrationConfig, NodeAvailabilityWrapper, NodeConfigureRequest, + NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, NodeShardResponse, + PlacementPolicy, SafekeeperDescribeResponse, SafekeeperSchedulingPolicyRequest, + ShardSchedulingPolicy, ShardsPreferredAzsRequest, ShardsPreferredAzsResponse, + SkSchedulingPolicy, TenantCreateRequest, TenantDescribeResponse, TenantPolicyRequest, + TenantShardMigrateRequest, TenantShardMigrateResponse, }; use pageserver_api::models::{ EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters, @@ -112,6 +112,15 @@ enum Command { tenant_shard_id: TenantShardId, #[arg(long)] node: NodeId, + #[arg(long, default_value_t = true, action = clap::ArgAction::Set)] + prewarm: bool, + #[arg(long, default_value_t = false, action = clap::ArgAction::Set)] + override_scheduler: bool, + }, + /// Watch the location of a tenant shard evolve, e.g. while expecting it to migrate + TenantShardWatch { + #[arg(long)] + tenant_shard_id: TenantShardId, }, /// Migrate the secondary location for a tenant shard to a specific pageserver. TenantShardMigrateSecondary { @@ -619,19 +628,43 @@ async fn main() -> anyhow::Result<()> { Command::TenantShardMigrate { tenant_shard_id, node, + prewarm, + override_scheduler, } => { - let req = TenantShardMigrateRequest { - node_id: node, - migration_config: None, + let migration_config = MigrationConfig { + prewarm, + override_scheduler, + ..Default::default() }; - storcon_client + let req = TenantShardMigrateRequest { + node_id: node, + origin_node_id: None, + migration_config, + }; + + match storcon_client .dispatch::( Method::PUT, format!("control/v1/tenant/{tenant_shard_id}/migrate"), Some(req), ) - .await?; + .await + { + Err(mgmt_api::Error::ApiError(StatusCode::PRECONDITION_FAILED, msg)) => { + anyhow::bail!( + "Migration to {node} rejected, may require `--force` ({}) ", + msg + ); + } + Err(e) => return Err(e.into()), + Ok(_) => {} + } + + watch_tenant_shard(storcon_client, tenant_shard_id, Some(node)).await?; + } + Command::TenantShardWatch { tenant_shard_id } => { + watch_tenant_shard(storcon_client, tenant_shard_id, None).await?; } Command::TenantShardMigrateSecondary { tenant_shard_id, @@ -639,7 +672,8 @@ async fn main() -> anyhow::Result<()> { } => { let req = TenantShardMigrateRequest { node_id: node, - migration_config: None, + origin_node_id: None, + migration_config: MigrationConfig::default(), }; storcon_client @@ -1105,7 +1139,8 @@ async fn main() -> anyhow::Result<()> { format!("control/v1/tenant/{}/migrate", mv.tenant_shard_id), Some(TenantShardMigrateRequest { node_id: mv.to, - migration_config: None, + origin_node_id: Some(mv.from), + migration_config: MigrationConfig::default(), }), ) .await @@ -1284,3 +1319,68 @@ async fn main() -> anyhow::Result<()> { Ok(()) } + +static WATCH_INTERVAL: Duration = Duration::from_secs(5); + +async fn watch_tenant_shard( + storcon_client: Client, + tenant_shard_id: TenantShardId, + until_migrated_to: Option, +) -> anyhow::Result<()> { + if let Some(until_migrated_to) = until_migrated_to { + println!( + "Waiting for tenant shard {} to be migrated to node {}", + tenant_shard_id, until_migrated_to + ); + } + + loop { + let desc = storcon_client + .dispatch::<(), TenantDescribeResponse>( + Method::GET, + format!("control/v1/tenant/{}", tenant_shard_id.tenant_id), + None, + ) + .await?; + + // Output the current state of the tenant shard + let shard = desc + .shards + .iter() + .find(|s| s.tenant_shard_id == tenant_shard_id) + .ok_or(anyhow::anyhow!("Tenant shard not found"))?; + let summary = format!( + "attached: {} secondary: {} {}", + shard + .node_attached + .map(|n| format!("{}", n)) + .unwrap_or("none".to_string()), + shard + .node_secondary + .iter() + .map(|n| n.to_string()) + .collect::>() + .join(","), + if shard.is_reconciling { + "(reconciler active)" + } else { + "(reconciler idle)" + } + ); + println!("{}", summary); + + // Maybe drop out if we finished migration + if let Some(until_migrated_to) = until_migrated_to { + if shard.node_attached == Some(until_migrated_to) && !shard.is_reconciling { + println!( + "Tenant shard {} is now on node {}", + tenant_shard_id, until_migrated_to + ); + break; + } + } + + tokio::time::sleep(WATCH_INTERVAL).await; + } + Ok(()) +} diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 2cfe1a85f9..154ab849dd 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -182,20 +182,66 @@ pub struct TenantDescribeResponseShard { #[derive(Serialize, Deserialize, Debug)] pub struct TenantShardMigrateRequest { pub node_id: NodeId, + + /// Optionally, callers may specify the node they are migrating _from_, and the server will + /// reject the request if the shard is no longer attached there: this enables writing safer + /// clients that don't risk fighting with some other movement of the shard. #[serde(default)] - pub migration_config: Option, + pub origin_node_id: Option, + + #[serde(default)] + pub migration_config: MigrationConfig, } -#[derive(Serialize, Deserialize, Debug)] +#[derive(Serialize, Deserialize, Debug, PartialEq, Eq)] pub struct MigrationConfig { + /// If true, the migration will be executed even if it is to a location with a sub-optimal scheduling + /// score: this is usually not what you want, and if you use this then you'll also need to set the + /// tenant's scheduling policy to Essential or Pause to avoid the optimiser reverting your migration. + /// + /// Default: false + #[serde(default)] + pub override_scheduler: bool, + + /// If true, the migration will be done gracefully by creating a secondary location first and + /// waiting for it to warm up before cutting over. If false, if there is no existing secondary + /// location at the destination, the tenant will be migrated immediately. If the tenant's data + /// can't be downloaded within [`Self::secondary_warmup_timeout`], then the migration will go + /// ahead but run with a cold cache that can severely reduce performance until it warms up. + /// + /// When doing a graceful migration, the migration API returns as soon as it is started. + /// + /// Default: true + #[serde(default = "default_prewarm")] + pub prewarm: bool, + + /// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait + /// overall for secondary warmup before cutting over #[serde(default)] #[serde(with = "humantime_serde")] pub secondary_warmup_timeout: Option, + /// For non-prewarm migrations which will immediately enter a cutover to the new node: how long to wait + /// within each secondary download poll call to pageserver. #[serde(default)] #[serde(with = "humantime_serde")] pub secondary_download_request_timeout: Option, } +fn default_prewarm() -> bool { + true +} + +impl Default for MigrationConfig { + fn default() -> Self { + Self { + override_scheduler: false, + prewarm: default_prewarm(), + secondary_warmup_timeout: None, + secondary_download_request_timeout: None, + } + } +} + #[derive(Serialize, Clone, Debug)] #[serde(into = "NodeAvailabilityWrapper")] pub enum NodeAvailability { @@ -487,4 +533,43 @@ mod test { err ); } + + /// Check that a minimal migrate request with no config results in the expected default settings + #[test] + fn test_migrate_request_decode_defaults() { + let json = r#"{ + "node_id": 123 + }"#; + + let request: TenantShardMigrateRequest = serde_json::from_str(json).unwrap(); + assert_eq!(request.node_id, NodeId(123)); + assert_eq!(request.origin_node_id, None); + assert!(!request.migration_config.override_scheduler); + assert!(request.migration_config.prewarm); + assert_eq!(request.migration_config.secondary_warmup_timeout, None); + assert_eq!( + request.migration_config.secondary_download_request_timeout, + None + ); + } + + /// Check that a partially specified migration config results in the expected default settings + #[test] + fn test_migration_config_decode_defaults() { + // Specify just one field of the config + let json = r#"{ + }"#; + + let config: MigrationConfig = serde_json::from_str(json).unwrap(); + + // Check each field's expected default value + assert!(!config.override_scheduler); + assert!(config.prewarm); + assert_eq!(config.secondary_warmup_timeout, None); + assert_eq!(config.secondary_download_request_timeout, None); + assert_eq!(config.secondary_warmup_timeout, None); + + // Consistency check that the Default impl agrees with our serde defaults + assert_eq!(MigrationConfig::default(), config); + } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index caa2040ce2..6795abf6e9 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -85,7 +85,9 @@ use crate::reconciler::{ attached_location_conf, }; use crate::safekeeper::Safekeeper; -use crate::scheduler::{MaySchedule, ScheduleContext, ScheduleError, ScheduleMode, Scheduler}; +use crate::scheduler::{ + AttachedShardTag, MaySchedule, ScheduleContext, ScheduleError, ScheduleMode, Scheduler, +}; use crate::tenant_shard::{ IntentState, MigrateAttachment, ObservedState, ObservedStateDelta, ObservedStateLocation, ReconcileNeeded, ReconcileResult, ReconcileWaitError, ReconcilerStatus, ReconcilerWaiter, @@ -5299,12 +5301,93 @@ impl Service { Ok((response, waiters)) } + /// A graceful migration: update the preferred node and let optimisation handle the migration + /// in the background (may take a long time as it will fully warm up a location before cutting over) + /// + /// Our external API calls this a 'prewarm=true' migration, but internally it isn't a special prewarm step: it's + /// just a migration that uses the same graceful procedure as our background scheduling optimisations would use. + fn tenant_shard_migrate_with_prewarm( + &self, + migrate_req: &TenantShardMigrateRequest, + shard: &mut TenantShard, + scheduler: &mut Scheduler, + schedule_context: ScheduleContext, + ) -> Result, ApiError> { + shard.set_preferred_node(Some(migrate_req.node_id)); + + // Generate whatever the initial change to the intent is: this could be creation of a secondary, or + // cutting over to an existing secondary. Caller is responsible for validating this before applying it, + // e.g. by checking secondary is warm enough. + Ok(shard.optimize_attachment(scheduler, &schedule_context)) + } + + /// Immediate migration: directly update the intent state and kick off a reconciler + fn tenant_shard_migrate_immediate( + &self, + migrate_req: &TenantShardMigrateRequest, + nodes: &Arc>, + shard: &mut TenantShard, + scheduler: &mut Scheduler, + ) -> Result, ApiError> { + // Non-graceful migration: update the intent state immediately + let old_attached = *shard.intent.get_attached(); + match shard.policy { + PlacementPolicy::Attached(n) => { + // If our new attached node was a secondary, it no longer should be. + shard + .intent + .remove_secondary(scheduler, migrate_req.node_id); + + shard + .intent + .set_attached(scheduler, Some(migrate_req.node_id)); + + // If we were already attached to something, demote that to a secondary + if let Some(old_attached) = old_attached { + if n > 0 { + // Remove other secondaries to make room for the location we'll demote + while shard.intent.get_secondary().len() >= n { + shard.intent.pop_secondary(scheduler); + } + + shard.intent.push_secondary(scheduler, old_attached); + } + } + } + PlacementPolicy::Secondary => { + shard.intent.clear(scheduler); + shard.intent.push_secondary(scheduler, migrate_req.node_id); + } + PlacementPolicy::Detached => { + return Err(ApiError::BadRequest(anyhow::anyhow!( + "Cannot migrate a tenant that is PlacementPolicy::Detached: configure it to an attached policy first" + ))); + } + } + + tracing::info!("Migrating: new intent {:?}", shard.intent); + shard.sequence = shard.sequence.next(); + shard.set_preferred_node(None); // Abort any in-flight graceful migration + Ok(self.maybe_configured_reconcile_shard( + shard, + nodes, + (&migrate_req.migration_config).into(), + )) + } + pub(crate) async fn tenant_shard_migrate( &self, tenant_shard_id: TenantShardId, migrate_req: TenantShardMigrateRequest, ) -> Result { - let waiter = { + // Depending on whether the migration is a change and whether it's graceful or immediate, we might + // get a different outcome to handle + enum MigrationOutcome { + Optimization(Option), + Reconcile(Option), + } + + let outcome = { let mut locked = self.inner.write().unwrap(); let (nodes, tenants, scheduler) = locked.parts_mut(); @@ -5315,71 +5398,139 @@ impl Service { ))); }; + // Migration to unavavailable node requires force flag if !node.is_available() { - // Warn but proceed: the caller may intend to manually adjust the placement of - // a shard even if the node is down, e.g. if intervening during an incident. - tracing::warn!("Migrating to unavailable node {node}"); + if migrate_req.migration_config.override_scheduler { + // Warn but proceed: the caller may intend to manually adjust the placement of + // a shard even if the node is down, e.g. if intervening during an incident. + tracing::warn!("Forcibly migrating to unavailable node {node}"); + } else { + tracing::warn!("Node {node} is unavailable, refusing migration"); + return Err(ApiError::PreconditionFailed( + format!("Node {node} is unavailable").into_boxed_str(), + )); + } } + // Calculate the ScheduleContext for this tenant + let mut schedule_context = ScheduleContext::default(); + for (_shard_id, shard) in + tenants.range(TenantShardId::tenant_range(tenant_shard_id.tenant_id)) + { + schedule_context.avoid(&shard.intent.all_pageservers()); + } + + // Look up the specific shard we will migrate let Some(shard) = tenants.get_mut(&tenant_shard_id) else { return Err(ApiError::NotFound( anyhow::anyhow!("Tenant shard not found").into(), )); }; + // Migration to a node with unfavorable scheduling score requires a force flag, because it might just + // be migrated back by the optimiser. + if let Some(better_node) = shard.find_better_location::( + scheduler, + &schedule_context, + migrate_req.node_id, + &[], + ) { + if !migrate_req.migration_config.override_scheduler { + return Err(ApiError::PreconditionFailed( + "Migration to a worse-scoring node".into(), + )); + } else { + tracing::info!( + "Migrating to a worse-scoring node {} (optimiser would prefer {better_node})", + migrate_req.node_id + ); + } + } + + if let Some(origin_node_id) = migrate_req.origin_node_id { + if shard.intent.get_attached() != &Some(origin_node_id) { + return Err(ApiError::PreconditionFailed( + format!( + "Migration expected to originate from {} but shard is on {:?}", + origin_node_id, + shard.intent.get_attached() + ) + .into(), + )); + } + } + if shard.intent.get_attached() == &Some(migrate_req.node_id) { // No-op case: we will still proceed to wait for reconciliation in case it is // incomplete from an earlier update to the intent. tracing::info!("Migrating: intent is unchanged {:?}", shard.intent); + + // An instruction to migrate to the currently attached node should + // cancel any pending graceful migration + shard.set_preferred_node(None); + + MigrationOutcome::Reconcile(self.maybe_configured_reconcile_shard( + shard, + nodes, + (&migrate_req.migration_config).into(), + )) + } else if migrate_req.migration_config.prewarm { + MigrationOutcome::Optimization(self.tenant_shard_migrate_with_prewarm( + &migrate_req, + shard, + scheduler, + schedule_context, + )?) } else { - let old_attached = *shard.intent.get_attached(); - - match shard.policy { - PlacementPolicy::Attached(n) => { - // If our new attached node was a secondary, it no longer should be. - shard - .intent - .remove_secondary(scheduler, migrate_req.node_id); - - shard - .intent - .set_attached(scheduler, Some(migrate_req.node_id)); - - // If we were already attached to something, demote that to a secondary - if let Some(old_attached) = old_attached { - if n > 0 { - // Remove other secondaries to make room for the location we'll demote - while shard.intent.get_secondary().len() >= n { - shard.intent.pop_secondary(scheduler); - } - - shard.intent.push_secondary(scheduler, old_attached); - } - } - } - PlacementPolicy::Secondary => { - shard.intent.clear(scheduler); - shard.intent.push_secondary(scheduler, migrate_req.node_id); - } - PlacementPolicy::Detached => { - return Err(ApiError::BadRequest(anyhow::anyhow!( - "Cannot migrate a tenant that is PlacementPolicy::Detached: configure it to an attached policy first" - ))); - } - } - - tracing::info!("Migrating: new intent {:?}", shard.intent); - shard.sequence = shard.sequence.next(); + MigrationOutcome::Reconcile(self.tenant_shard_migrate_immediate( + &migrate_req, + nodes, + shard, + scheduler, + )?) } - - let reconciler_config = match migrate_req.migration_config { - Some(cfg) => (&cfg).into(), - None => ReconcilerConfig::new(ReconcilerPriority::High), - }; - - self.maybe_configured_reconcile_shard(shard, nodes, reconciler_config) }; + // We may need to validate + apply an optimisation, or we may need to just retrive a reconcile waiter + let waiter = match outcome { + MigrationOutcome::Optimization(Some(optimization)) => { + // Validate and apply the optimization -- this would happen anyway in background reconcile loop, but + // we might as well do it more promptly as this is a direct external request. + let mut validated = self + .optimize_all_validate(vec![(tenant_shard_id, optimization)]) + .await; + if let Some((_shard_id, optimization)) = validated.pop() { + let mut locked = self.inner.write().unwrap(); + let (nodes, tenants, scheduler) = locked.parts_mut(); + let Some(shard) = tenants.get_mut(&tenant_shard_id) else { + // Rare but possible: tenant is removed between generating optimisation and validating it. + return Err(ApiError::NotFound( + anyhow::anyhow!("Tenant shard not found").into(), + )); + }; + + if !shard.apply_optimization(scheduler, optimization) { + // This can happen but is unusual enough to warn on: something else changed in the shard that made the optimisation stale + // and therefore not applied. + tracing::warn!( + "Schedule optimisation generated during graceful migration was not applied, shard changed?" + ); + } + self.maybe_configured_reconcile_shard( + shard, + nodes, + (&migrate_req.migration_config).into(), + ) + } else { + None + } + } + MigrationOutcome::Optimization(None) => None, + MigrationOutcome::Reconcile(waiter) => waiter, + }; + + // Finally, wait for any reconcile we started to complete. In the case of immediate-mode migrations to cold + // locations, this has a good chance of timing out. if let Some(waiter) = waiter { waiter.wait_timeout(RECONCILE_TIMEOUT).await?; } else { @@ -6959,6 +7110,10 @@ impl Service { ShardSchedulingPolicy::Active => { // Ok to do optimization } + ShardSchedulingPolicy::Essential if shard.get_preferred_node().is_some() => { + // Ok to do optimization: we are executing a graceful migration that + // has set preferred_node + } ShardSchedulingPolicy::Essential | ShardSchedulingPolicy::Pause | ShardSchedulingPolicy::Stop => { diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 27e478043e..96ff70a951 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -132,6 +132,10 @@ pub(crate) struct TenantShard { /// of state that we publish externally in an eventually consistent way. pub(crate) pending_compute_notification: bool, + /// To do a graceful migration, set this field to the destination pageserver, and optimization + /// functions will consider this node the best location and react appropriately. + preferred_node: Option, + // Support/debug tool: if something is going wrong or flapping with scheduling, this may // be set to a non-active state to avoid making changes while the issue is fixed. scheduling_policy: ShardSchedulingPolicy, @@ -555,6 +559,7 @@ impl TenantShard { last_error: Arc::default(), pending_compute_notification: false, scheduling_policy: ShardSchedulingPolicy::default(), + preferred_node: None, } } @@ -809,6 +814,15 @@ impl TenantShard { return None; }; + // If the candidate is our preferred node, then it is better than the current location, as long + // as it is online -- the online check is part of the score calculation we did above, so it's + // important that this check comes after that one. + if let Some(preferred) = self.preferred_node.as_ref() { + if preferred == &candidate { + return Some(true); + } + } + match scheduler.compute_node_score::( current, &self.intent.preferred_az_id, @@ -847,13 +861,22 @@ impl TenantShard { } } - fn find_better_location( + pub(crate) fn find_better_location( &self, scheduler: &mut Scheduler, schedule_context: &ScheduleContext, current: NodeId, hard_exclude: &[NodeId], ) -> Option { + // If we have a migration hint, then that is our better location + if let Some(hint) = self.preferred_node.as_ref() { + if hint == ¤t { + return None; + } + + return Some(*hint); + } + // Look for a lower-scoring location to attach to let Ok(candidate_node) = scheduler.schedule_shard::( hard_exclude, @@ -887,6 +910,13 @@ impl TenantShard { scheduler: &mut Scheduler, schedule_context: &ScheduleContext, ) -> bool { + // Tenant with preferred node: check if it is not already at the preferred node + if let Some(preferred) = self.preferred_node.as_ref() { + if Some(preferred) != self.intent.get_attached().as_ref() { + return true; + } + } + // Sharded tenant: check if any locations have a nonzero affinity score if self.shard.count >= ShardCount(1) { let schedule_context = schedule_context.project_detach(self); @@ -927,6 +957,9 @@ impl TenantShard { /// Optimize attachments: if a shard has a secondary location that is preferable to /// its primary location based on soft constraints, switch that secondary location /// to be attached. + /// + /// `schedule_context` should have been populated with all shards in the tenant, including + /// the one we're trying to optimize (this function will subtract its own contribution before making scoring decisions) #[instrument(skip_all, fields(tenant_id=%self.tenant_shard_id.tenant_id, shard_id=%self.tenant_shard_id.shard_slug()))] pub(crate) fn optimize_attachment( &self, @@ -1055,7 +1088,8 @@ impl TenantShard { // // This should be a transient state, there should always be capacity eventually in our preferred AZ (even if nodes // there are too overloaded for scheduler to suggest them, more should be provisioned eventually). - if self.intent.preferred_az_id.is_some() + if self.preferred_node.is_none() + && self.intent.preferred_az_id.is_some() && scheduler.get_node_az(&replacement) != self.intent.preferred_az_id { tracing::debug!( @@ -1161,6 +1195,27 @@ impl TenantShard { None } + /// Start or abort a graceful migration of this shard to another pageserver. This works on top of the + /// other optimisation functions, to bias them to move to the destination node. + pub(crate) fn set_preferred_node(&mut self, node: Option) { + if let Some(hint) = self.preferred_node.as_ref() { + if Some(hint) != node.as_ref() { + // This is legal but a bit surprising: we expect that administrators wouldn't usually + // change their mind about where to migrate something. + tracing::warn!( + "Changing migration destination from {hint} to {node:?} (current intent {:?})", + self.intent + ); + } + } + + self.preferred_node = node; + } + + pub(crate) fn get_preferred_node(&self) -> Option { + self.preferred_node + } + /// Return true if the optimization was really applied: it will not be applied if the optimization's /// sequence is behind this tenant shard's pub(crate) fn apply_optimization( @@ -1185,6 +1240,14 @@ impl TenantShard { self.intent.demote_attached(scheduler, old_attached_node_id); self.intent .promote_attached(scheduler, new_attached_node_id); + + if let Some(hint) = self.preferred_node.as_ref() { + if hint == &new_attached_node_id { + // The migration target is not a long term pin: once we are done with the migration, clear it. + tracing::info!("Graceful migration to {hint} complete"); + self.preferred_node = None; + } + } } ScheduleOptimizationAction::ReplaceSecondary(ReplaceSecondary { old_node_id, @@ -1703,6 +1766,10 @@ impl TenantShard { debug_assert!(!self.intent.all_pageservers().contains(&node_id)); + if self.preferred_node == Some(node_id) { + self.preferred_node = None; + } + intent_modified } @@ -1750,6 +1817,7 @@ impl TenantShard { pending_compute_notification: false, delayed_reconcile: false, scheduling_policy: serde_json::from_str(&tsp.scheduling_policy).unwrap(), + preferred_node: None, }) } @@ -2270,6 +2338,85 @@ pub(crate) mod tests { Ok(()) } + #[test] + /// How the optimisation code handles a shard with a preferred node set; this is an example + /// of the multi-step migration, but driven by a different input. + fn optimize_attachment_multi_preferred_node() -> anyhow::Result<()> { + let nodes = make_test_nodes( + 4, + &[ + AvailabilityZone("az-a".to_string()), + AvailabilityZone("az-a".to_string()), + AvailabilityZone("az-b".to_string()), + AvailabilityZone("az-b".to_string()), + ], + ); + let mut scheduler = Scheduler::new(nodes.values()); + + // Two shards of a tenant that wants to be in AZ A + let mut shard_a = make_test_tenant_shard(PlacementPolicy::Attached(1)); + shard_a.intent.preferred_az_id = Some(AvailabilityZone("az-a".to_string())); + + // Initially attached in a stable location + shard_a.intent.set_attached(&mut scheduler, Some(NodeId(1))); + shard_a.intent.push_secondary(&mut scheduler, NodeId(3)); + + // Set the preferred node to node 2, an equally high scoring node to its current location + shard_a.preferred_node = Some(NodeId(2)); + + fn make_schedule_context(shard_a: &TenantShard) -> ScheduleContext { + let mut schedule_context = ScheduleContext::default(); + schedule_context.avoid(&shard_a.intent.all_pageservers()); + schedule_context + } + + let schedule_context = make_schedule_context(&shard_a); + let optimization_a_prepare = shard_a.optimize_attachment(&mut scheduler, &schedule_context); + assert_eq!( + optimization_a_prepare, + Some(ScheduleOptimization { + sequence: shard_a.sequence, + action: ScheduleOptimizationAction::CreateSecondary(NodeId(2)) + }) + ); + shard_a.apply_optimization(&mut scheduler, optimization_a_prepare.unwrap()); + + // The first step of the optimisation should not have cleared the preferred node + assert_eq!(shard_a.preferred_node, Some(NodeId(2))); + + let schedule_context = make_schedule_context(&shard_a); + let optimization_a_migrate = shard_a.optimize_attachment(&mut scheduler, &schedule_context); + assert_eq!( + optimization_a_migrate, + Some(ScheduleOptimization { + sequence: shard_a.sequence, + action: ScheduleOptimizationAction::MigrateAttachment(MigrateAttachment { + old_attached_node_id: NodeId(1), + new_attached_node_id: NodeId(2) + }) + }) + ); + shard_a.apply_optimization(&mut scheduler, optimization_a_migrate.unwrap()); + + // The cutover step of the optimisation should have cleared the preferred node + assert_eq!(shard_a.preferred_node, None); + + let schedule_context = make_schedule_context(&shard_a); + let optimization_a_cleanup = shard_a.optimize_attachment(&mut scheduler, &schedule_context); + assert_eq!( + optimization_a_cleanup, + Some(ScheduleOptimization { + sequence: shard_a.sequence, + action: ScheduleOptimizationAction::RemoveSecondary(NodeId(1)) + }) + ); + shard_a.apply_optimization(&mut scheduler, optimization_a_cleanup.unwrap()); + + shard_a.intent.clear(&mut scheduler); + + Ok(()) + } + #[test] /// Check that multi-step migration works when moving to somewhere that is only better by /// 1 AffinityScore -- this ensures that we don't have a bug like the intermediate secondary diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 61e1ec79ad..4d2b3587e8 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1715,8 +1715,12 @@ class StorageControllerLeadershipStatus(StrEnum): @dataclass class StorageControllerMigrationConfig: - secondary_warmup_timeout: str | None - secondary_download_request_timeout: str | None + # Unlike the API itself, tests default to prewarm=False because it's a simpler API and doesn't + # require the test to go poll for the migration actually completing. + prewarm: bool = False + override_scheduler: bool = False + secondary_warmup_timeout: str | None = None + secondary_download_request_timeout: str | None = None class NeonStorageController(MetricsGetter, LogUtils): @@ -2120,8 +2124,10 @@ class NeonStorageController(MetricsGetter, LogUtils): config: StorageControllerMigrationConfig | None = None, ): payload = {"tenant_shard_id": str(tenant_shard_id), "node_id": dest_ps_id} - if config is not None: - payload["migration_config"] = dataclasses.asdict(config) + if config is None: + config = StorageControllerMigrationConfig() + + payload["migration_config"] = dataclasses.asdict(config) self.request( "PUT", @@ -2129,8 +2135,13 @@ class NeonStorageController(MetricsGetter, LogUtils): json=payload, headers=self.headers(TokenScope.ADMIN), ) - log.info(f"Migrated tenant {tenant_shard_id} to pageserver {dest_ps_id}") - assert self.env.get_tenant_pageserver(tenant_shard_id).id == dest_ps_id + if config.prewarm: + log.info( + f"Started prewarm migration of tenant {tenant_shard_id} to pageserver {dest_ps_id}" + ) + else: + log.info(f"Migrated tenant {tenant_shard_id} to pageserver {dest_ps_id}") + assert self.env.get_tenant_pageserver(tenant_shard_id).id == dest_ps_id def tenant_policy_update(self, tenant_id: TenantId, body: dict[str, Any]): log.info(f"tenant_policy_update({tenant_id}, {body})") diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index d45db28c78..777b9e2870 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -16,6 +16,7 @@ from fixtures.neon_fixtures import ( NeonPageserver, PageserverAvailability, PageserverSchedulingPolicy, + StorageControllerMigrationConfig, ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pg_version import PgVersion @@ -362,7 +363,10 @@ def test_storage_controller_many_tenants( dest_ps_id = desc["shards"][shard_number]["node_secondary"][0] f = executor.submit( - env.storage_controller.tenant_shard_migrate, tenant_shard_id, dest_ps_id + env.storage_controller.tenant_shard_migrate, + tenant_shard_id, + dest_ps_id, + StorageControllerMigrationConfig(prewarm=False, override_scheduler=True), ) elif op == Operation.TENANT_PASSTHROUGH: # A passthrough read to shard zero diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index b9e2934505..130db009c9 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -976,7 +976,7 @@ def test_migration_to_cold_secondary(neon_env_builder: NeonEnvBuilder): # We can't hydrate everything anyway because of the failpoints. # Implicitly, this also uploads a heatmap from the current attached location. config = StorageControllerMigrationConfig( - secondary_warmup_timeout="5s", secondary_download_request_timeout="2s" + secondary_warmup_timeout="5s", secondary_download_request_timeout="2s", prewarm=False ) env.storage_controller.tenant_shard_migrate( TenantShardId(tenant_id, shard_number=0, shard_count=0), ps_secondary.id, config diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index d5acc257b2..b5572ce6a1 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -26,6 +26,7 @@ from fixtures.neon_fixtures import ( PgBin, StorageControllerApiException, StorageControllerLeadershipStatus, + StorageControllerMigrationConfig, last_flush_lsn_upload, ) from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient @@ -765,7 +766,10 @@ def test_storage_controller_stuck_compute_hook( # status is cleared. handle_params["status"] = 423 migrate_fut = executor.submit( - env.storage_controller.tenant_shard_migrate, shard_0_id, dest_ps_id + env.storage_controller.tenant_shard_migrate, + shard_0_id, + dest_ps_id, + config=StorageControllerMigrationConfig(prewarm=False, override_scheduler=True), ) def logged_stuck(): @@ -793,7 +797,10 @@ def test_storage_controller_stuck_compute_hook( # Now, do a migration in the opposite direction handle_params["status"] = 423 migrate_fut = executor.submit( - env.storage_controller.tenant_shard_migrate, shard_0_id, origin_pageserver.id + env.storage_controller.tenant_shard_migrate, + shard_0_id, + origin_pageserver.id, + config=StorageControllerMigrationConfig(prewarm=False, override_scheduler=True), ) def logged_stuck_again(): @@ -1027,7 +1034,11 @@ def test_storage_controller_compute_hook_revert( with pytest.raises(StorageControllerApiException, match="Timeout waiting for shard"): # We expect the controller to give us an error because its reconciliation timed out # waiting for the compute hook. - env.storage_controller.tenant_shard_migrate(tenant_shard_id, pageserver_b.id) + env.storage_controller.tenant_shard_migrate( + tenant_shard_id, + pageserver_b.id, + config=StorageControllerMigrationConfig(prewarm=False, override_scheduler=True), + ) # Although the migration API failed, the hook should still see pageserver B (it remembers what # was posted even when returning an error code) @@ -1068,7 +1079,11 @@ def test_storage_controller_compute_hook_revert( # Migrate B -> A, with a working compute hook: the controller should notify the hook because the # last update it made that was acked (423) by the compute was for node B. handle_params["status"] = 200 - env.storage_controller.tenant_shard_migrate(tenant_shard_id, pageserver_a.id) + env.storage_controller.tenant_shard_migrate( + tenant_shard_id, + pageserver_a.id, + config=StorageControllerMigrationConfig(prewarm=False, override_scheduler=True), + ) wait_until(lambda: notified_ps(pageserver_a.id)) @@ -1949,6 +1964,9 @@ def test_storcon_cli(neon_env_builder: NeonEnvBuilder): env.storage_controller.tenant_describe(tenant_id)["shards"][0]["node_attached"] ) ), + # A simple migration where we will ignore scheduling (force=true) and do it immediately (prewarm=false) + "--prewarm=false", + "--override-scheduler=true", ] ) @@ -3865,3 +3883,108 @@ def test_storage_controller_location_conf_equivalence(neon_env_builder: NeonEnvB ) assert reconciles_after_restart == 0 + + +@pytest.mark.parametrize("wrong_az", [True, False]) +def test_storage_controller_graceful_migration(neon_env_builder: NeonEnvBuilder, wrong_az: bool): + """ + Test that the graceful migration API goes through the process of + creating a secondary & waiting for it to warm up before cutting over, when + we use the prewarm=True flag to the API. + """ + + # 2 pageservers in 2 AZs, so that each AZ has a pageserver we can migrate to + neon_env_builder.num_pageservers = 4 + neon_env_builder.num_azs = 2 + + env = neon_env_builder.init_start() + + # Enable secondary location (neon_local disables by default) + env.storage_controller.tenant_policy_update(env.initial_tenant, {"placement": {"Attached": 1}}) + env.storage_controller.reconcile_until_idle() + + initial_desc = env.storage_controller.tenant_describe(env.initial_tenant)["shards"][0] + initial_ps_id = initial_desc["node_attached"] + initial_secondary_id = initial_desc["node_secondary"][0] + initial_ps_az = initial_desc["preferred_az_id"] + initial_ps = [ps for ps in env.pageservers if ps.id == initial_ps_id][0] + + if wrong_az: + dest_ps = [ + ps + for ps in env.pageservers + if ps.id != initial_ps_id + and ps.az_id != initial_ps_az + and ps.id != initial_secondary_id + ][0] + else: + dest_ps = [ + ps + for ps in env.pageservers + if ps.id != initial_ps_id + and ps.az_id == initial_ps_az + and ps.id != initial_secondary_id + ][0] + + log.info( + f"Migrating to {dest_ps.id} in AZ {dest_ps.az_id} (from {initial_ps_id} in AZ {initial_ps_az})" + ) + dest_ps_id = dest_ps.id + + # Set a failpoint so that the migration will block at the point it has a secondary location + for ps in env.pageservers: + ps.http_client().configure_failpoints(("secondary-layer-download-pausable", "pause")) + + # Before migration, our destination has no locations. Guaranteed because any secondary for our + # tenant will be in another AZ. + assert dest_ps.http_client().tenant_list_locations()["tenant_shards"] == [] + + if wrong_az: + # If migrating to the wrong AZ, first check that omitting force flag results in rejection + with pytest.raises(StorageControllerApiException, match="worse-scoring node"): + env.storage_controller.tenant_shard_migrate( + TenantShardId(env.initial_tenant, 0, 0), + dest_ps_id, + config=StorageControllerMigrationConfig(prewarm=True, override_scheduler=False), + ) + + # Turn off ordinary optimisations so that our migration will stay put once complete + env.storage_controller.tenant_policy_update(env.initial_tenant, {"scheduling": "Essential"}) + + # We expect this API call to succeed, and result in a new secondary location on the destination + env.storage_controller.tenant_shard_migrate( + TenantShardId(env.initial_tenant, 0, 0), + dest_ps_id, + config=StorageControllerMigrationConfig(prewarm=True, override_scheduler=wrong_az), + ) + + def secondary_at_dest(): + locs = dest_ps.http_client().tenant_list_locations()["tenant_shards"] + assert len(locs) == 1 + assert locs[0][0] == str(env.initial_tenant) + assert locs[0][1]["mode"] == "Secondary" + + wait_until(secondary_at_dest) + + # Unblock secondary downloads + for ps in env.pageservers: + ps.http_client().configure_failpoints(("secondary-layer-download-pausable", "off")) + + # Pump the reconciler to avoid waiting for background reconciles + env.storage_controller.reconcile_until_idle() + + # We should be attached at the destination + locs = dest_ps.http_client().tenant_list_locations()["tenant_shards"] + assert len(locs) == 1 + assert locs[0][1]["mode"] == "AttachedSingle" + + # Nothing left behind at the origin + if wrong_az: + # We're in essential scheduling mode, so the end state should be attached in the migration + # destination and a secondary in the original location + assert ( + initial_ps.http_client().tenant_list_locations()["tenant_shards"][0][1]["mode"] + == "Secondary" + ) + else: + assert initial_ps.http_client().tenant_list_locations()["tenant_shards"] == [] From e876794ce578111d915c492d5abfa47360e6ee82 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 7 Mar 2025 21:22:47 +0400 Subject: [PATCH 125/207] storcon: use https safekeeper api (#11065) ## Problem Storage controller uses http for requests to safekeeper management API. Closes: https://github.com/neondatabase/cloud/issues/24835 ## Summary of changes - Add `use_https_safekeeper_api` option to storcon to use https api - Use https for requests to safekeeper management API if this option is enabled - Add `ssl_ca_file` option to storcon for ability to specify custom root CA certificate --- libs/pageserver_api/src/controller_api.rs | 1 + safekeeper/client/src/mgmt_api.rs | 20 +++-- safekeeper/src/pull_timeline.rs | 12 ++- .../down.sql | 1 + .../up.sql | 1 + storage_controller/src/heartbeater.rs | 8 ++ storage_controller/src/main.rs | 27 ++++++- storage_controller/src/node.rs | 12 ++- storage_controller/src/persistence.rs | 6 ++ storage_controller/src/safekeeper.rs | 77 +++++++++++++++---- storage_controller/src/safekeeper_client.rs | 14 +--- storage_controller/src/schema.rs | 1 + storage_controller/src/service.rs | 56 ++++++++++---- .../regress/test_storage_controller.py | 20 +++++ 14 files changed, 193 insertions(+), 63 deletions(-) create mode 100644 storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/down.sql create mode 100644 storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/up.sql diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 154ab849dd..3cb62f9d18 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -489,6 +489,7 @@ pub struct SafekeeperDescribeResponse { pub host: String, pub port: i32, pub http_port: i32, + pub https_port: Option, pub availability_zone_id: String, pub scheduling_policy: SkSchedulingPolicy, } diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs index 0e92e87103..3966aa811f 100644 --- a/safekeeper/client/src/mgmt_api.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -37,6 +37,10 @@ pub enum Error { #[error("Cancelled")] Cancelled, + + /// Failed to create client. + #[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] + CreateClient(reqwest::Error), } pub type Result = std::result::Result; @@ -64,11 +68,7 @@ impl ResponseErrorMessageExt for reqwest::Response { } impl Client { - pub fn new(mgmt_api_endpoint: String, jwt: Option) -> Self { - Self::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt) - } - - pub fn from_client( + pub fn new( client: reqwest::Client, mgmt_api_endpoint: String, jwt: Option, @@ -172,12 +172,10 @@ impl Client { uri: U, body: B, ) -> Result { - let req = self.client.request(method, uri); - let req = if let Some(value) = &self.authorization_header { - req.header(reqwest::header::AUTHORIZATION, value.get_contents()) - } else { - req - }; + let mut req = self.client.request(method, uri); + if let Some(value) = &self.authorization_header { + req = req.header(reqwest::header::AUTHORIZATION, value.get_contents()) + } req.json(&body).send().await.map_err(Error::ReceiveBody) } } diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index fc58b8509a..7d6ce1269c 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -402,12 +402,16 @@ pub async fn handle_request( bail!("Timeline {} already exists", request.timeline_id); } + // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's + // part of https support (#24836). + let http_client = reqwest::Client::new(); + let http_hosts = request.http_hosts.clone(); // Figure out statuses of potential donors. let responses: Vec> = futures::future::join_all(http_hosts.iter().map(|url| async { - let cclient = Client::new(url.clone(), sk_auth_token.clone()); + let cclient = Client::new(http_client.clone(), url.clone(), sk_auth_token.clone()); let info = cclient .timeline_status(request.tenant_id, request.timeline_id) .await?; @@ -460,8 +464,10 @@ async fn pull_timeline( let conf = &global_timelines.get_global_config(); let (_tmp_dir, tli_dir_path) = create_temp_timeline_dir(conf, ttid).await?; - - let client = Client::new(host.clone(), sk_auth_token.clone()); + // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's + // part of https support (#24836). + let http_client = reqwest::Client::new(); + let client = Client::new(http_client, host.clone(), sk_auth_token.clone()); // Request stream with basebackup archive. let bb_resp = client .snapshot(status.tenant_id, status.timeline_id, conf.my_id) diff --git a/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/down.sql b/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/down.sql new file mode 100644 index 0000000000..378e9f8c16 --- /dev/null +++ b/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/down.sql @@ -0,0 +1 @@ +ALTER TABLE safekeepers DROP https_port; diff --git a/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/up.sql b/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/up.sql new file mode 100644 index 0000000000..bb47b0b256 --- /dev/null +++ b/storage_controller/migrations/2025-02-28-141741_safekeeper_use_https/up.sql @@ -0,0 +1 @@ +ALTER TABLE safekeepers ADD https_port INTEGER; diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs index 56a331becd..dab6799d3e 100644 --- a/storage_controller/src/heartbeater.rs +++ b/storage_controller/src/heartbeater.rs @@ -8,6 +8,7 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use pageserver_api::controller_api::{NodeAvailability, SkSchedulingPolicy}; use pageserver_api::models::PageserverUtilization; +use reqwest::Certificate; use safekeeper_api::models::SafekeeperUtilization; use safekeeper_client::mgmt_api; use thiserror::Error; @@ -27,6 +28,7 @@ struct HeartbeaterTask { max_offline_interval: Duration, max_warming_up_interval: Duration, jwt_token: Option, + ssl_ca_cert: Option, } #[derive(Debug, Clone)] @@ -75,6 +77,7 @@ where { pub(crate) fn new( jwt_token: Option, + ssl_ca_cert: Option, max_offline_interval: Duration, max_warming_up_interval: Duration, cancel: CancellationToken, @@ -84,6 +87,7 @@ where let mut heartbeater = HeartbeaterTask::new( receiver, jwt_token, + ssl_ca_cert, max_offline_interval, max_warming_up_interval, cancel, @@ -119,6 +123,7 @@ where fn new( receiver: tokio::sync::mpsc::UnboundedReceiver>, jwt_token: Option, + ssl_ca_cert: Option, max_offline_interval: Duration, max_warming_up_interval: Duration, cancel: CancellationToken, @@ -130,6 +135,7 @@ where max_offline_interval, max_warming_up_interval, jwt_token, + ssl_ca_cert, } } async fn run(&mut self) { @@ -325,6 +331,7 @@ impl HeartBeat for HeartbeaterTask for HeartbeaterTask, - // Maximum acceptable lag for the secondary location while draining - // a pageserver + /// Maximum acceptable lag for the secondary location while draining + /// a pageserver #[arg(long)] max_secondary_lag_bytes: Option, - // Period with which to send heartbeats to registered nodes + /// Period with which to send heartbeats to registered nodes #[arg(long)] heartbeat_interval: Option, #[arg(long)] long_reconcile_threshold: Option, - // Flag to use https for requests to pageserver API. + /// Flag to use https for requests to pageserver API. #[arg(long, default_value = "false")] use_https_pageserver_api: bool, + /// Flag to use https for requests to safekeeper API. + #[arg(long, default_value = "false")] + use_https_safekeeper_api: bool, + + /// Trusted root CA certificate to use in https APIs. + #[arg(long)] + ssl_ca_file: Option, } enum StrictMode { @@ -315,6 +323,15 @@ async fn async_main() -> anyhow::Result<()> { } } + let ssl_ca_cert = match args.ssl_ca_file.as_ref() { + Some(ssl_ca_file) => { + tracing::info!("Using ssl root CA file: {ssl_ca_file:?}"); + let buf = tokio::fs::read(ssl_ca_file).await?; + Some(Certificate::from_pem(&buf)?) + } + None => None, + }; + let config = Config { pageserver_jwt_token: secrets.pageserver_jwt_token, safekeeper_jwt_token: secrets.safekeeper_jwt_token, @@ -351,6 +368,8 @@ async fn async_main() -> anyhow::Result<()> { start_as_candidate: args.start_as_candidate, http_service_port: args.listen.port() as i32, use_https_pageserver_api: args.use_https_pageserver_api, + use_https_safekeeper_api: args.use_https_safekeeper_api, + ssl_ca_cert, }; // Validate that we can connect to the database diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs index bc7fe8802a..735bae2123 100644 --- a/storage_controller/src/node.rs +++ b/storage_controller/src/node.rs @@ -1,7 +1,6 @@ use std::str::FromStr; use std::time::Duration; -use anyhow::anyhow; use pageserver_api::controller_api::{ AvailabilityZone, NodeAvailability, NodeDescribeResponse, NodeRegisterRequest, NodeSchedulingPolicy, TenantLocateResponseShard, @@ -211,7 +210,10 @@ impl Node { use_https: bool, ) -> anyhow::Result { if use_https && listen_https_port.is_none() { - return Err(anyhow!("https is enabled, but node has no https port")); + anyhow::bail!( + "cannot create node {id}: \ + https is enabled, but https port is not specified" + ); } Ok(Self { @@ -244,7 +246,11 @@ impl Node { pub(crate) fn from_persistent(np: NodePersistence, use_https: bool) -> anyhow::Result { if use_https && np.listen_https_port.is_none() { - return Err(anyhow!("https is enabled, but node has no https port")); + anyhow::bail!( + "cannot load node {} from persistent: \ + https is enabled, but https port is not specified", + np.node_id, + ); } Ok(Self { diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 2e80b48859..939b8c6cd8 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -1559,6 +1559,7 @@ pub(crate) struct SafekeeperPersistence { pub(crate) http_port: i32, pub(crate) availability_zone_id: String, pub(crate) scheduling_policy: SkSchedulingPolicyFromSql, + pub(crate) https_port: Option, } /// Wrapper struct around [`SkSchedulingPolicy`] because both it and [`FromSql`] are from foreign crates, @@ -1599,6 +1600,7 @@ impl SafekeeperPersistence { host: upsert.host, port: upsert.port, http_port: upsert.http_port, + https_port: upsert.https_port, availability_zone_id: upsert.availability_zone_id, scheduling_policy: SkSchedulingPolicyFromSql(scheduling_policy), } @@ -1611,6 +1613,7 @@ impl SafekeeperPersistence { host: self.host.clone(), port: self.port, http_port: self.http_port, + https_port: self.https_port, availability_zone_id: self.availability_zone_id.clone(), scheduling_policy: self.scheduling_policy.0, }) @@ -1631,6 +1634,7 @@ pub(crate) struct SafekeeperUpsert { /// The active flag will not be stored in the database and will be ignored. pub(crate) active: Option, pub(crate) http_port: i32, + pub(crate) https_port: Option, pub(crate) availability_zone_id: String, } @@ -1646,6 +1650,7 @@ impl SafekeeperUpsert { host: &self.host, port: self.port, http_port: self.http_port, + https_port: self.https_port, availability_zone_id: &self.availability_zone_id, // None means a wish to not update this column. We expose abilities to update it via other means. scheduling_policy: None, @@ -1662,6 +1667,7 @@ struct InsertUpdateSafekeeper<'a> { host: &'a str, port: i32, http_port: i32, + https_port: Option, availability_zone_id: &'a str, scheduling_policy: Option<&'a str>, } diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs index 9c7e6e0894..16f72ef4bc 100644 --- a/storage_controller/src/safekeeper.rs +++ b/storage_controller/src/safekeeper.rs @@ -1,7 +1,7 @@ use std::time::Duration; use pageserver_api::controller_api::{SafekeeperDescribeResponse, SkSchedulingPolicy}; -use reqwest::StatusCode; +use reqwest::{Certificate, StatusCode}; use safekeeper_client::mgmt_api; use tokio_util::sync::CancellationToken; use utils::backoff; @@ -18,26 +18,55 @@ pub struct Safekeeper { cancel: CancellationToken, listen_http_addr: String, listen_http_port: u16, + listen_https_port: Option, scheduling_policy: SkSchedulingPolicy, id: NodeId, availability: SafekeeperState, + + // Flag from storcon's config to use https for safekeeper API. + // Invariant: if |true|, listen_https_port should contain a value. + use_https: bool, } impl Safekeeper { - pub(crate) fn from_persistence(skp: SafekeeperPersistence, cancel: CancellationToken) -> Self { + pub(crate) fn from_persistence( + skp: SafekeeperPersistence, + cancel: CancellationToken, + use_https: bool, + ) -> anyhow::Result { + if use_https && skp.https_port.is_none() { + anyhow::bail!( + "cannot load safekeeper {} from persistence: \ + https is enabled, but https port is not specified", + skp.id, + ); + } + let scheduling_policy = skp.scheduling_policy.0; - Self { + Ok(Self { cancel, listen_http_addr: skp.host.clone(), listen_http_port: skp.http_port as u16, + listen_https_port: skp.https_port.map(|x| x as u16), id: NodeId(skp.id as u64), skp, availability: SafekeeperState::Offline, scheduling_policy, - } + use_https, + }) } + pub(crate) fn base_url(&self) -> String { - format!("http://{}:{}", self.listen_http_addr, self.listen_http_port) + if self.use_https { + format!( + "https://{}:{}", + self.listen_http_addr, + self.listen_https_port + .expect("https port should be specified if use_https is on"), + ) + } else { + format!("http://{}:{}", self.listen_http_addr, self.listen_http_port) + } } pub(crate) fn get_id(&self) -> NodeId { @@ -57,10 +86,12 @@ impl Safekeeper { self.skp.scheduling_policy = scheduling_policy.into(); } /// Perform an operation (which is given a [`SafekeeperClient`]) with retries + #[allow(clippy::too_many_arguments)] pub(crate) async fn with_client_retries( &self, mut op: O, jwt: &Option, + ssl_ca_cert: &Option, warn_threshold: u32, max_retries: u32, timeout: Duration, @@ -79,19 +110,22 @@ impl Safekeeper { | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false, ApiError(_, _) => true, Cancelled => true, + CreateClient(_) => true, } } + // TODO: refactor SafekeeperClient and with_client_retires (#11113). + let mut http_client = reqwest::Client::builder().timeout(timeout); + if let Some(ssl_ca_cert) = ssl_ca_cert.as_ref() { + http_client = http_client.add_root_certificate(ssl_ca_cert.clone()); + } + let http_client = http_client.build().map_err(mgmt_api::Error::CreateClient)?; + backoff::retry( || { - let http_client = reqwest::ClientBuilder::new() - .timeout(timeout) - .build() - .expect("Failed to construct HTTP client"); - - let client = SafekeeperClient::from_client( + let client = SafekeeperClient::new( self.get_id(), - http_client, + http_client.clone(), self.base_url(), jwt.clone(), ); @@ -112,8 +146,9 @@ impl Safekeeper { warn_threshold, max_retries, &format!( - "Call to safekeeper {} ({}:{}) management API", - self.id, self.listen_http_addr, self.listen_http_port + "Call to safekeeper {} ({}) management API", + self.id, + self.base_url(), ), cancel, ) @@ -121,12 +156,16 @@ impl Safekeeper { .unwrap_or(Err(mgmt_api::Error::Cancelled)) } - pub(crate) fn update_from_record(&mut self, record: crate::persistence::SafekeeperUpsert) { + pub(crate) fn update_from_record( + &mut self, + record: crate::persistence::SafekeeperUpsert, + ) -> anyhow::Result<()> { let crate::persistence::SafekeeperUpsert { active: _, availability_zone_id: _, host, http_port, + https_port, id, port: _, region_id: _, @@ -139,9 +178,17 @@ impl Safekeeper { self.id.0 ); } + if self.use_https && https_port.is_none() { + anyhow::bail!( + "cannot update safekeeper {id}: \ + https is enabled, but https port is not specified" + ); + } self.skp = crate::persistence::SafekeeperPersistence::from_upsert(record, self.scheduling_policy); self.listen_http_port = http_port as u16; + self.listen_https_port = https_port.map(|x| x as u16); self.listen_http_addr = host; + Ok(()) } } diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs index fb5be092a0..662f6d43be 100644 --- a/storage_controller/src/safekeeper_client.rs +++ b/storage_controller/src/safekeeper_client.rs @@ -45,26 +45,14 @@ macro_rules! measured_request { } impl SafekeeperClient { - #[allow(dead_code)] pub(crate) fn new( - node_id: NodeId, - mgmt_api_endpoint: String, - jwt: Option, - ) -> Self { - Self { - inner: Client::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt), - node_id_label: node_id.0.to_string(), - } - } - - pub(crate) fn from_client( node_id: NodeId, raw_client: reqwest::Client, mgmt_api_endpoint: String, jwt: Option, ) -> Self { Self { - inner: Client::from_client(raw_client, mgmt_api_endpoint, jwt), + inner: Client::new(raw_client, mgmt_api_endpoint, jwt), node_id_label: node_id.0.to_string(), } } diff --git a/storage_controller/src/schema.rs b/storage_controller/src/schema.rs index 361253bd19..ebfe630173 100644 --- a/storage_controller/src/schema.rs +++ b/storage_controller/src/schema.rs @@ -40,6 +40,7 @@ diesel::table! { http_port -> Int4, availability_zone_id -> Text, scheduling_policy -> Varchar, + https_port -> Nullable, } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 6795abf6e9..d8c9ee70b1 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -48,7 +48,7 @@ use pageserver_api::upcall_api::{ ValidateResponseTenant, }; use pageserver_client::{BlockUnblock, mgmt_api}; -use reqwest::StatusCode; +use reqwest::{Certificate, StatusCode}; use safekeeper_api::models::SafekeeperUtilization; use tokio::sync::TryAcquireError; use tokio::sync::mpsc::error::TrySendError; @@ -400,6 +400,9 @@ pub struct Config { pub long_reconcile_threshold: Duration, pub use_https_pageserver_api: bool, + pub use_https_safekeeper_api: bool, + + pub ssl_ca_cert: Option, } impl From for ApiError { @@ -1420,8 +1423,14 @@ impl Service { .list_safekeepers() .await? .into_iter() - .map(|skp| Safekeeper::from_persistence(skp, CancellationToken::new())) - .collect::>(); + .map(|skp| { + Safekeeper::from_persistence( + skp, + CancellationToken::new(), + config.use_https_safekeeper_api, + ) + }) + .collect::>>()?; let safekeepers: HashMap = safekeepers.into_iter().map(|n| (n.get_id(), n)).collect(); tracing::info!("Loaded {} safekeepers from database.", safekeepers.len()); @@ -1559,6 +1568,7 @@ impl Service { let heartbeater_ps = Heartbeater::new( config.pageserver_jwt_token.clone(), + config.ssl_ca_cert.clone(), config.max_offline_interval, config.max_warming_up_interval, cancel.clone(), @@ -1566,6 +1576,7 @@ impl Service { let heartbeater_sk = Heartbeater::new( config.safekeeper_jwt_token.clone(), + config.ssl_ca_cert.clone(), config.max_offline_interval, config.max_warming_up_interval, cancel.clone(), @@ -8227,24 +8238,41 @@ impl Service { pub(crate) async fn upsert_safekeeper( &self, record: crate::persistence::SafekeeperUpsert, - ) -> Result<(), DatabaseError> { + ) -> Result<(), ApiError> { let node_id = NodeId(record.id as u64); + let use_https = self.config.use_https_safekeeper_api; + + if use_https && record.https_port.is_none() { + return Err(ApiError::PreconditionFailed( + format!( + "cannot upsert safekeeper {node_id}: \ + https is enabled, but https port is not specified" + ) + .into(), + )); + } + self.persistence.safekeeper_upsert(record.clone()).await?; { let mut locked = self.inner.write().unwrap(); let mut safekeepers = (*locked.safekeepers).clone(); match safekeepers.entry(node_id) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - entry.get_mut().update_from_record(record); - } + std::collections::hash_map::Entry::Occupied(mut entry) => entry + .get_mut() + .update_from_record(record) + .expect("all preconditions should be checked before upsert to database"), std::collections::hash_map::Entry::Vacant(entry) => { - entry.insert(Safekeeper::from_persistence( - crate::persistence::SafekeeperPersistence::from_upsert( - record, - SkSchedulingPolicy::Pause, - ), - CancellationToken::new(), - )); + entry.insert( + Safekeeper::from_persistence( + crate::persistence::SafekeeperPersistence::from_upsert( + record, + SkSchedulingPolicy::Pause, + ), + CancellationToken::new(), + use_https, + ) + .expect("all preconditions should be checked before upsert to database"), + ); } } locked.safekeepers = Arc::new(safekeepers); diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index b5572ce6a1..29919f2fe7 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3226,6 +3226,7 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder): "host": "localhost", "port": sk_0.port.pg, "http_port": sk_0.port.http, + "https_port": None, "version": 5957, "availability_zone_id": "us-east-2b", } @@ -3260,6 +3261,24 @@ def test_safekeeper_deployment_time_update(neon_env_builder: NeonEnvBuilder): assert eq_safekeeper_records(body, inserted_now) + # https_port appears during migration + body["https_port"] = 123 + target.on_safekeeper_deploy(fake_id, body) + inserted_now = target.get_safekeeper(fake_id) + assert target.get_safekeepers() == [inserted_now] + assert inserted_now is not None + assert eq_safekeeper_records(body, inserted_now) + env.storage_controller.consistency_check() + + # https_port rollback + body["https_port"] = None + target.on_safekeeper_deploy(fake_id, body) + inserted_now = target.get_safekeeper(fake_id) + assert target.get_safekeepers() == [inserted_now] + assert inserted_now is not None + assert eq_safekeeper_records(body, inserted_now) + env.storage_controller.consistency_check() + # some small tests for the scheduling policy querying and returning APIs newest_info = target.get_safekeeper(inserted["id"]) assert newest_info @@ -3792,6 +3811,7 @@ def test_storage_controller_node_flap_detach_race( wait_until(validate_locations, timeout=10) +@run_only_on_default_postgres("this is like a 'unit test' against storcon db") def test_update_node_on_registration(neon_env_builder: NeonEnvBuilder): """ Check that storage controller handles node_register requests with updated fields correctly. From cd438406fb5fbaeb02c4edf6cb10c4349127bfb3 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Fri, 7 Mar 2025 12:42:52 -0500 Subject: [PATCH 126/207] feat(pageserver): add force patch index_part API (#11119) ## Problem As part of the disaster recovery tool. Partly for https://github.com/neondatabase/neon/issues/9114. ## Summary of changes * Add a new pageserver API to force patch the fields in index_part and modify the timeline internal structures. --------- Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/models.rs | 9 +++ pageserver/src/http/routes.rs | 77 ++++++++++++++++++- .../src/tenant/remote_timeline_client.rs | 8 ++ test_runner/fixtures/pageserver/http.py | 13 ++++ test_runner/regress/test_pageserver_api.py | 24 +++++- 5 files changed, 129 insertions(+), 2 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index fabfe28aa2..749a8acc4e 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -1146,6 +1146,15 @@ pub struct TimelineArchivalConfigRequest { pub state: TimelineArchivalState, } +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone)] +pub struct TimelinePatchIndexPartRequest { + pub rel_size_migration: Option, + pub gc_compaction_last_completed_lsn: Option, + pub applied_gc_cutoff_lsn: Option, + #[serde(default)] + pub force_index_update: bool, +} + #[derive(Debug, Serialize, Deserialize, Clone)] pub struct TimelinesInfoAndOffloaded { pub timelines: Vec, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index cd79aa6680..3c0c23a56d 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -37,7 +37,8 @@ use pageserver_api::models::{ TenantShardSplitResponse, TenantSorting, TenantState, TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateRequestMode, TimelineCreateRequestModeImportPgdata, TimelineGcRequest, TimelineInfo, - TimelinesInfoAndOffloaded, TopTenantShardItem, TopTenantShardsRequest, TopTenantShardsResponse, + TimelinePatchIndexPartRequest, TimelinesInfoAndOffloaded, TopTenantShardItem, + TopTenantShardsRequest, TopTenantShardsResponse, }; use pageserver_api::shard::{ShardCount, TenantShardId}; use remote_storage::{DownloadError, GenericRemoteStorage, TimeTravelError}; @@ -63,6 +64,7 @@ use crate::tenant::mgr::{ GetActiveTenantError, GetTenantError, TenantManager, TenantMapError, TenantMapInsertError, TenantSlot, TenantSlotError, TenantSlotUpsertError, TenantStateError, UpsertLocationError, }; +use crate::tenant::remote_timeline_client::index::GcCompactionState; use crate::tenant::remote_timeline_client::{ download_index_part, list_remote_tenant_shards, list_remote_timelines, }; @@ -858,6 +860,75 @@ async fn timeline_archival_config_handler( json_response(StatusCode::OK, ()) } +/// This API is used to patch the index part of a timeline. You must ensure such patches are safe to apply. Use this API as an emergency +/// measure only. +/// +/// Some examples of safe patches: +/// - Increase the gc_cutoff and gc_compaction_cutoff to a larger value in case of a bug that didn't bump the cutoff and cause read errors. +/// - Force set the index part to use reldir v2 (migrating/migrated). +/// +/// Some examples of unsafe patches: +/// - Force set the index part from v2 to v1 (legacy). This will cause the code path to ignore anything written to the new keyspace and cause +/// errors. +/// - Decrease the gc_cutoff without validating the data really exists. It will cause read errors in the background. +async fn timeline_patch_index_part_handler( + mut request: Request, + _cancel: CancellationToken, +) -> Result, ApiError> { + let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + + let request_data: TimelinePatchIndexPartRequest = json_request(&mut request).await?; + check_permission(&request, None)?; // require global permission for this request + let state = get_state(&request); + + async { + let timeline = + active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) + .await?; + + if let Some(rel_size_migration) = request_data.rel_size_migration { + timeline + .update_rel_size_v2_status(rel_size_migration) + .map_err(ApiError::InternalServerError)?; + } + + if let Some(gc_compaction_last_completed_lsn) = + request_data.gc_compaction_last_completed_lsn + { + timeline + .update_gc_compaction_state(GcCompactionState { + last_completed_lsn: gc_compaction_last_completed_lsn, + }) + .map_err(ApiError::InternalServerError)?; + } + + if let Some(applied_gc_cutoff_lsn) = request_data.applied_gc_cutoff_lsn { + { + let guard = timeline.applied_gc_cutoff_lsn.lock_for_write(); + guard.store_and_unlock(applied_gc_cutoff_lsn); + } + } + + if request_data.force_index_update { + timeline + .remote_client + .force_schedule_index_upload() + .context("force schedule index upload") + .map_err(ApiError::InternalServerError)?; + } + + Ok::<_, ApiError>(()) + } + .instrument(info_span!("timeline_patch_index_part", + tenant_id = %tenant_shard_id.tenant_id, + shard_id = %tenant_shard_id.shard_slug(), + %timeline_id)) + .await?; + + json_response(StatusCode::OK, ()) +} + async fn timeline_detail_handler( request: Request, _cancel: CancellationToken, @@ -3629,6 +3700,10 @@ pub fn make_router( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/get_timestamp_of_lsn", |r| api_handler(r, get_timestamp_of_lsn_handler), ) + .post( + "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/patch_index_part", + |r| api_handler(r, timeline_patch_index_part_handler), + ) .post( "/v1/tenant/:tenant_shard_id/timeline/:timeline_id/lsn_lease", |r| api_handler(r, lsn_lease_handler), diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index a784a05972..891760b499 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -954,6 +954,14 @@ impl RemoteTimelineClient { Ok(()) } + /// Only used in the `patch_index_part` HTTP API to force trigger an index upload. + pub fn force_schedule_index_upload(self: &Arc) -> Result<(), NotInitialized> { + let mut guard = self.upload_queue.lock().unwrap(); + let upload_queue = guard.initialized_mut()?; + self.schedule_index_upload(upload_queue); + Ok(()) + } + /// Launch an index-file upload operation in the background (internal function) fn schedule_index_upload(self: &Arc, upload_queue: &mut UploadQueueInitialized) { let disk_consistent_lsn = upload_queue.dirty.metadata.disk_consistent_lsn(); diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 364aff325d..0efe0b9575 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -375,6 +375,19 @@ class PageserverHttpClient(requests.Session, MetricsGetter): res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/reset", params=params) self.verbose_error(res) + def timeline_patch_index_part( + self, + tenant_id: TenantId | TenantShardId, + timeline_id: TimelineId, + data: dict[str, Any], + ): + res = self.post( + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/patch_index_part", + json=data, + ) + self.verbose_error(res) + return res.json() + def tenant_location_conf( self, tenant_id: TenantId | TenantShardId, diff --git a/test_runner/regress/test_pageserver_api.py b/test_runner/regress/test_pageserver_api.py index 55fd7a8608..17ffeca23b 100644 --- a/test_runner/regress/test_pageserver_api.py +++ b/test_runner/regress/test_pageserver_api.py @@ -7,7 +7,7 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, ) from fixtures.pageserver.http import PageserverHttpClient -from fixtures.utils import wait_until +from fixtures.utils import run_only_on_default_postgres, wait_until def check_client(env: NeonEnv, client: PageserverHttpClient): @@ -138,3 +138,25 @@ def test_pageserver_http_api_client_auth_enabled(neon_env_builder: NeonEnvBuilde with env.pageserver.http_client(auth_token=pageserver_token) as client: check_client(env, client) + + +@run_only_on_default_postgres("it does not use any postgres functionality") +def test_pageserver_http_index_part_force_patch(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + with env.pageserver.http_client() as client: + client.timeline_patch_index_part( + tenant_id, + timeline_id, + {"rel_size_migration": "migrating"}, + ) + assert client.timeline_detail(tenant_id, timeline_id)["rel_size_migration"] == "migrating" + # This is invalid in practice: we should never rollback the migrating state to legacy. + # But we do it here to test the API. + client.timeline_patch_index_part( + tenant_id, + timeline_id, + {"rel_size_migration": "legacy"}, + ) + assert client.timeline_detail(tenant_id, timeline_id)["rel_size_migration"] == "legacy" From 3fe56500396c0c4712d171319bea5195be2da59e Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Fri, 7 Mar 2025 13:00:11 -0600 Subject: [PATCH 127/207] Fix dropping role with table privileges granted by non-neon_superuser (#10964) We were previously only revoking privileges granted by neon_superuser. However, we need to do it for all grantors. Signed-off-by: Tristan Partin --- .../sql/pre_drop_role_revoke_privileges.sql | 30 +++++--- test_runner/regress/test_compute_catalog.py | 71 ++++++++++++++++++- 2 files changed, 88 insertions(+), 13 deletions(-) diff --git a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql index 4342650591..734607be02 100644 --- a/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql +++ b/compute_tools/src/sql/pre_drop_role_revoke_privileges.sql @@ -1,8 +1,7 @@ -SET SESSION ROLE neon_superuser; - DO ${outer_tag}$ DECLARE schema TEXT; + grantor TEXT; revoke_query TEXT; BEGIN FOR schema IN @@ -15,16 +14,25 @@ BEGIN -- ii) it's easy to add more schemas to the list if needed. WHERE schema_name IN ('public') LOOP - revoke_query := format( - 'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY neon_superuser;', - schema, - -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()` - {role_name} - ); + FOR grantor IN EXECUTE + format( + 'SELECT DISTINCT rtg.grantor FROM information_schema.role_table_grants AS rtg WHERE grantee = %s', + -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()` + quote_literal({role_name}) + ) + LOOP + EXECUTE format('SET LOCAL ROLE %I', grantor); - EXECUTE revoke_query; + revoke_query := format( + 'REVOKE ALL PRIVILEGES ON ALL TABLES IN SCHEMA %I FROM %I GRANTED BY %I', + schema, + -- N.B. this has to be properly dollar-escaped with `pg_quote_dollar()` + {role_name}, + grantor + ); + + EXECUTE revoke_query; + END LOOP; END LOOP; END; ${outer_tag}$; - -RESET ROLE; diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py index ce655d22b5..2e7da86d9d 100644 --- a/test_runner/regress/test_compute_catalog.py +++ b/test_runner/regress/test_compute_catalog.py @@ -323,10 +323,12 @@ def test_dropdb_with_subscription(neon_simple_env: NeonEnv): assert curr_db[0] == PUB_DB_NAME -def test_compute_drop_role(neon_simple_env: NeonEnv): +def test_drop_role_with_table_privileges_from_neon_superuser(neon_simple_env: NeonEnv): """ Test that compute_ctl can drop a role even if it has some depending objects - like permissions in one of the databases. + like permissions in one of the databases that were granted by + neon_superuser. + Reproduction test for https://github.com/neondatabase/cloud/issues/13582 """ env = neon_simple_env @@ -442,3 +444,68 @@ def test_compute_drop_role(neon_simple_env: NeonEnv): cursor.execute("SELECT rolname FROM pg_roles WHERE rolname = 'readonly2'") role = cursor.fetchone() assert role is None + + +def test_drop_role_with_table_privileges_from_non_neon_superuser(neon_simple_env: NeonEnv): + """ + Test that compute_ctl can drop a role if the role has previously been + granted table privileges by a role other than neon_superuser. + """ + TEST_DB_NAME = "neondb" + TEST_GRANTOR = "; RAISE EXCEPTION 'SQL injection detected;" + TEST_GRANTEE = "'$$; RAISE EXCEPTION 'SQL injection detected;'" + + env = neon_simple_env + + endpoint = env.endpoints.create_start("main") + endpoint.respec_deep( + **{ + "skip_pg_catalog_updates": False, + "cluster": { + "roles": [ + { + # We need to create role via compute_ctl, because in this case it will receive + # additional grants equivalent to our real environment, so we can repro some + # issues. + "name": TEST_GRANTOR, + # Some autocomplete-suggested hash, no specific meaning. + "encrypted_password": "SCRAM-SHA-256$4096:hBT22QjqpydQWqEulorfXA==$miBogcoj68JWYdsNB5PW1X6PjSLBEcNuctuhtGkb4PY=:hxk2gxkwxGo6P7GCtfpMlhA9zwHvPMsCz+NQf2HfvWk=", + "options": [], + }, + ], + "databases": [ + { + "name": TEST_DB_NAME, + "owner": TEST_GRANTOR, + }, + ], + }, + } + ) + + endpoint.reconfigure() + + with endpoint.cursor(dbname=TEST_DB_NAME, user=TEST_GRANTOR) as cursor: + cursor.execute(f'CREATE USER "{TEST_GRANTEE}"') + cursor.execute("CREATE TABLE test_table(id bigint)") + cursor.execute(f'GRANT ALL ON TABLE test_table TO "{TEST_GRANTEE}"') + + endpoint.respec_deep( + **{ + "skip_pg_catalog_updates": False, + "delta_operations": [ + { + "action": "delete_role", + "name": TEST_GRANTEE, + }, + ], + } + ) + endpoint.reconfigure() + + with endpoint.cursor() as cursor: + cursor.execute( + "SELECT rolname FROM pg_roles WHERE rolname = %(role)s", {"role": TEST_GRANTEE} + ) + role = cursor.fetchone() + assert role is None From 1b8c4286c416e9a883eb9804a4826cdd0dbf28d3 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Sun, 9 Mar 2025 12:29:44 -0500 Subject: [PATCH 128/207] Fetch remote extension in ALTER EXTENSION UPDATE statements (#11102) Previously, remote extensions were not fetched unless they were used in some other manner. For instance, loading a BM25 index in pg_search fetches the pg_search extension. However, if on a fresh compute with pg_search 0.15.5 installed, the user ran `ALTER EXTENSION pg_search UPDATE TO '0.15.6'` without first using the pg_search extension, we would not fetch the extension and fail to find an update path. Signed-off-by: Tristan Partin --- compute_tools/src/extension_server.rs | 18 ++- .../5670669815/v14/ext_index.json | 24 --- .../5670669815/v14/extensions/anon.tar.zst | Bin 324259 -> 0 bytes .../v14/extensions/pg_buffercache.tar.zst | Bin 130807 -> 0 bytes .../5670669815/v15/ext_index.json | 17 -- .../5670669815/v15/extensions/anon.tar.zst | Bin 317163 -> 0 bytes .../5670669815/v16/ext_index.json | 7 - .../5670669815/v17/ext_index.json | 7 - .../sql/test_extension--1.0--1.1.sql | 10 ++ .../sql/test_extension--1.0.sql | 12 ++ .../test_extension/test_extension.control | 1 + .../regress/test_download_extensions.py | 145 ++++++++++++------ vendor/postgres-v14 | 2 +- vendor/postgres-v15 | 2 +- vendor/postgres-v16 | 2 +- vendor/postgres-v17 | 2 +- vendor/revisions.json | 8 +- 17 files changed, 142 insertions(+), 115 deletions(-) delete mode 100644 test_runner/regress/data/extension_test/5670669815/v14/ext_index.json delete mode 100644 test_runner/regress/data/extension_test/5670669815/v14/extensions/anon.tar.zst delete mode 100644 test_runner/regress/data/extension_test/5670669815/v14/extensions/pg_buffercache.tar.zst delete mode 100644 test_runner/regress/data/extension_test/5670669815/v15/ext_index.json delete mode 100644 test_runner/regress/data/extension_test/5670669815/v15/extensions/anon.tar.zst delete mode 100644 test_runner/regress/data/extension_test/5670669815/v16/ext_index.json delete mode 100644 test_runner/regress/data/extension_test/5670669815/v17/ext_index.json create mode 100644 test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0--1.1.sql create mode 100644 test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0.sql create mode 100644 test_runner/regress/data/test_remote_extensions/test_extension/test_extension.control diff --git a/compute_tools/src/extension_server.rs b/compute_tools/src/extension_server.rs index b4de786b00..ee889e0c40 100644 --- a/compute_tools/src/extension_server.rs +++ b/compute_tools/src/extension_server.rs @@ -202,8 +202,24 @@ pub async fn download_extension( // move contents of the libdir / sharedir in unzipped archive to the correct local paths for paths in [sharedir_paths, libdir_paths] { let (zip_dir, real_dir) = paths; + + let dir = match std::fs::read_dir(&zip_dir) { + Ok(dir) => dir, + Err(e) => match e.kind() { + // In the event of a SQL-only extension, there would be nothing + // to move from the lib/ directory, so note that in the log and + // move on. + std::io::ErrorKind::NotFound => { + info!("nothing to move from {}", zip_dir); + continue; + } + _ => return Err(anyhow::anyhow!(e)), + }, + }; + info!("mv {zip_dir:?}/* {real_dir:?}"); - for file in std::fs::read_dir(zip_dir)? { + + for file in dir { let old_file = file?.path(); let new_file = Path::new(&real_dir).join(old_file.file_name().context("error parsing file")?); diff --git a/test_runner/regress/data/extension_test/5670669815/v14/ext_index.json b/test_runner/regress/data/extension_test/5670669815/v14/ext_index.json deleted file mode 100644 index af49dfa0c0..0000000000 --- a/test_runner/regress/data/extension_test/5670669815/v14/ext_index.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "public_extensions": [ - "anon", - "pg_buffercache" - ], - "library_index": { - "anon": "anon", - "pg_buffercache": "pg_buffercache" - }, - "extension_data": { - "pg_buffercache": { - "control_data": { - "pg_buffercache.control": "# pg_buffercache extension \ncomment = 'examine the shared buffer cache' \ndefault_version = '1.3' \nmodule_pathname = '$libdir/pg_buffercache' \nrelocatable = true \ntrusted=true" - }, - "archive_path": "5670669815/v14/extensions/pg_buffercache.tar.zst" - }, - "anon": { - "control_data": { - "anon.control": "# PostgreSQL Anonymizer (anon) extension \ncomment = 'Data anonymization tools' \ndefault_version = '1.1.0' \ndirectory='extension/anon' \nrelocatable = false \nrequires = 'pgcrypto' \nsuperuser = false \nmodule_pathname = '$libdir/anon' \ntrusted = true \n" - }, - "archive_path": "5670669815/v14/extensions/anon.tar.zst" - } - } -} \ No newline at end of file diff --git a/test_runner/regress/data/extension_test/5670669815/v14/extensions/anon.tar.zst b/test_runner/regress/data/extension_test/5670669815/v14/extensions/anon.tar.zst deleted file mode 100644 index 5c17630109e66149f559d75866478be70a3037c5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 324259 zcmX7PbzD>5|Nk~>^nk(W?rv#Dhm?v)irzV12C%ik(?i*J78K{&0ZYsh2*36dF7nCz9>l1(>sq-mpj zGxWk|`zi$zhE|W#rzdwG>+j_6$GKXe8GFpSF|2npBKUO3N+srW;{QmG$OTaKo zndGLm7DtVBwkcO0h?j+&xt4*(`&ET`b|pY-y0+s@2 z;ksPCs$R4lVX?;$6i0cT_q_o}IpyXm1u57S58YLc5=Dk4G0o$jq+Lg%SdngD$vA9c zk0sDC{t$#0xvQY{cRS)E0Zf&ghVJjnszH2?o}GL`JFqLu5<*vlG5%PtJjQ?O4;jVL z%55=ZzDbd!9x~tPCgiDZNwOrS79%~S$_!E09jO0K8*_fl#>f|QK6#7bEVo%Q&{fmD zKcS>Ei}8ontLo^sV_!1N%VWXuRP%CJUpqD3iJP5=VAsWU05=DVUm*c(UVOd)>Z`=}a$8X-&;CNXIBA-Cr+vu0 zZ2kIXG^*a`0i1@zaAy_YMt25HR|e*|^$kPH#bhcEY$h;{ zu+Fqmu^jqf|Hl8I7+lU_<9iTE$5A*S1APsd7p>o6flTw9_qmGdS8j0C8&hV1HEC90_AVKFMu>0A~DC@=P}ZNoA*dz zbp`NJtGO=TmXTnrH2J+hJ8!uL_LVMNH`=hvBv@NKH2l{hounTnpoHLVN^4uxWvh(z*a4konafQeChsqRK+O zdZd*h4)Ci_*@wSOdWZ;3`5&cA_L>E+4@f7tdo1j@{ zjBx{KtZ)5|JaT)}5-f|{+$;uTfain=ViaJP@E%PAXzeQmSs|A7Qkwhn*a)K>CF~JR3d!&a+n-K zcf%D4^(R133&71zj8Xi^kDJBZDuA*mHztV^0ko(Z2{*x#ra1sguq0-MEZG!|R7MtU z7W3Bwe4`>V_#^W-?=SD@@1Xk&I@2|Ic?k{k{{F`mFLVM9Cj<8Su|xZ|BgMihAi ztBJ6dP{1JG1sy9ga8n_P66wDws)27pVI8f^&U`r%3+#;Sq8bJ?MMgM^mM~uGd_lTw zvX&E;FrazMh`8}Cq~4X#qj@9g;o>L8zzsl1q#|Ppz&EmsOc`mpsX*xgXzW#Op`9hc zq<|w$H}grU0TsP}SU>>v$YU`i&>wUUNykSK-{3`2nGjne;6&`yXd*RGO7HD3E1c6? ztPjkOe<$$`0E!I#NH9x;LBs)qd;c)Wnh;@nnAj=Mo=<_RNVjJM^T=ZqCsJ~gf{PeJ zc;lCf6y0Q_g21iN2s=D>QnXts=xOgCN^>OtW?)tA1*Qk zF;35rQRqNCa=0n_ZWLg+RYX9648r($M+IV{#X+dlP(3I+JJvcAVCt6xq;v_qeGJO5 zS=$1aEQdJ~vI}5xW6FG0sA5g1+U4ZGPwYt??wtJ2!BxjbvDv$Ng+th1YPtEu!^mR zvIEgrS2bgFDgthV_KGAlp{alA4;+k%u;fIgbR80vFkTpwl=&2IgJ+W?xFH0y6Edu< zfXC|*U=V-6I#2;=a$oWqVA)?704l6^U>-fEd#(**oGNvn^{ z^nMN^gujodg#cSI7AgSyj_)M|br>r<0QLGW6GD?iqc=b#9Kr33bhPFJ5*_+2^+HOx zF&}zCKejC6MUX-09NiN73$Dx(mb<=m(2;je3NR+-7={n%Lys&19(UNPlZE))ZWHkD z6=~A2XDS*ID|BS{Q^R@hUD(0wGpd$BfQ5etLclCcSTbDR`a6;To^d!#M=zj`1H8GmA&OaKKS9xwx?VRGZSlUzLAxCr+Xz~DAE3V$}y?H!};P%Dcw z^0Bo>DLy+Ck;jj;NAJi13(;16grkE2q_5cN>p4mYOcIW1Igqw!?LHznH&zV=bQ%1k zOTtcd!4?ANz;aTZpQarV%-4TIf_yL7c}8ut$OY5}y%F^3u==i(I5vz{38d_a_qHu( zac+rb@#dharU77RT`LZtk+V5y6AvjtCmp`fO4@WB>d{Qy!Up7)%6M;k?hGY##gn~S z7WWZ}W#+H9yv~E&%S?i9K7BaG}-_=M_`;Hm9M!EY^6+-MBMD=tyax8hZ;ti zT>K{X7;B5%667>OcR}sYUGbq&1oZX4*$MfK>Hz}*G^)Agz_?IW*3IZHu)I0}y^s`U zomA4_4y{VVTI`m@qeB@Vx5RCQ$5Lx4lHFHx>r;{;h?ygac|wpJF~?xA+76mp42Ot)9J3vI z!^lh%qj9)7z+`vH7f(_98l&Y{jRwZKNsP7t<04F(X)k8dGGuzqPtz!Q1+m3|hHmfs zOOx%8c<8A2eXre-fR6|fmm~UEuHjwj;GZNWNlXCD+!lQ-kcno+gGyp#sLgHV61Ah; zMD%ZV>iz80qp9x=;1ojGdUA2eXzHb4^gv=)rJ5d@zM1utNKLJH7*_E;6(R*cWrmKs zUZ9~cD6y+ZixIxP3Q{$}tUoe2djUwehWNmJmfXkqNU5ooL}=~N2z-bDv5%p?nkM&P zD}~F6JR=Eb+MWG5V8 zs--98c7yAP*8ugwTVg?aA4DJ_In@Q|r`=#rfx073+&ZiEz$IAUw!r|yLWG?L(qjX~ zeJC=!d#ScV5^0hzZ`!yxxv@rEgLK<>_|RmuClaQu!hsEs^Nc~m*onBXfnbg~4(uyr zf0A+h@mqF%f8B)mP+I6o8Ta5H#zpbR)Z5r-#%>6)4}u;-r502xqk8Ali+3aSTb=B} zD^7GJV}N7e}_jt1EV(Wed`aELqcXZ2@&n5u5ewDW-b3e z__&@qa)!8uTg#be?>+8@L}H^cqQ{w##_cDcQTVgE|5V7$_0o@yT%#eFKSp}`kX*{V z(DrD(c`yoHH~K&?J%vn#>R}v zn^?ZXBfSA~Ob-BOtel#G4jAB1Epd^~f~YE83HlEf{)|nYG|5f8Ho)tZbn*Jr4W$PK zL~S5fhgHhp!hgfTR>gah9%MUcHSfL}BgxdYu>J2_q^hS|IV?OP z+#=|XPFHb21+iss-lnYgS^wXg?oN#I@bHQ7af1lNT+!+|tE|p}{L_|yCZ-SPcrvQo zUVjsA&sm{c>=d7>Xl=GC=$T0S-PXt`^i?I!NkZlr7$kR^Wr`U4&fk84AJg2~a*kwrh5vT^_#4&zZYiqSVVW^td54JFH?7qJB7s&Xot zp(d@_OR5eiyy3{iF6XC&C2Cy6gf03RcJK*?SxddLa#C5xxqOYKuTO~h{rlpCEgeUr zeja_mqqnyMRA^0j#6$XQ2L0G$goh;@=SZSxzS9_@lh6>d+3m8g>PXEJiJGlM?vRk5 zkgo(EllTC5mQ2=ov^YyYj(Gd0(_A4j;sz3-EXij+qG zr$C0P&17dg5ymNF0tD4oMA{8KBondpK&CK~PQ}u)jo&ER+HdA|p!+E-TJ~ps<`PT} zZ5QA}Z`svjNu^+57g~-aGvgT+cbs@m4c9sS55Z|5^v*2R0Mg7jjm39X%Q%hIw?vLz zJ86-H3<-*)7b1blxto}H6S0*1WY?xh711`;ZeiE5pR|wR^X*M1;_f`*H)}x6k8hUI zlpglK*R|jn!aJa5s7{bS0*wRtEnAQ%f}=<1Ya*iJ&~ZlGz0bjBEg>N%kXTVn>p?j) z`+f{5e->N#Y}sBoKCMZMOE45Gz(_wA<$poN{2DiLWr^Tqm$@?`QxkDcQgyns*(o3n z2%JZI(EwV}^7F~jKDwY>yadw6_6@rw;NRr5nUwN}M=Jx*OZy2x`sQSZM{{^gkdkfD zs5p8HNYVdJsURh|YatS`#)yc+~|$rf76o6(6O6fBO4<&ok;BtPEOx$cCF- z!gsXnPZUrLAvQoO=m&?Bf}TN2w>giGj!OHQzpu9hUNsw+OaOmdrd@KXqPc_^xb&^i+ z=Z$#e0BX2|Ml#(-Y?m&z+RsR`>TG9919}ZgCa(RjAbtU+__?yyIAOxsp^;KF+}PN- zmJ~t9Fo?6_qJR}Wk=sw{2($Lb3hLmxejFW*slirfgkH~n{mO-#-&-QK$#(>@QP)~rS!K%%LXvGTphAH?bjkXje{(gP{KNMv?sst69* zrx}DomMU;NLOMy3lBT!n8e<8-AwBZs=JG$B#MIw(^&GEnlDJ(O7G6}4^V*=!Pt>Vl z$S9qJnM**L$juJLd-Af%*y9<)<9+W6l#h!%hL*5P-o7Jy;E`xiQoCASx?}J-LAH(E zgSzA26pCI!8?4Q~-SchJjMv@$n4oN%IFFzW+=bBOTcDL|sF~GPUWx~E{yp*_C|lXt zK?EXK=>dy#a`xVkE&9r!G$DfuvtnSn!hHVaT)1{Ix$SQLx2-8f!*AK@t%rG1^R(>r zk-YD9Ta%nKi%!vy#IZYB9PTIq!yT{ZoyE;;Uz`eiG3SreS|?dp8U_X?F4}xIYg=Mj zc3v-d>hye>z~PaCu9`Bvie@Ik(dBHG$xpP*Y(0r9*~sKtGPUYitN-iDkyy}t3vPmczMi20m_m}bttc(`XfEysqXtPD@!Ccl12 zC2q%@jC%K54#UTzf**d(o|r@z;RTdFumlFBEm_93^Fg@2-N3gpycQ?ehdxDSrsV~CqqDj9`ZoIm-Yvmi-+d5~4IkWe-&fONoee(8JfBM;Zq)s3A z{zu7}_9lTw?6~)|sIhsH>%Tmu0jU(exUJc{=v=u|F%2Mrl4{LwwWu4K@AlLV-_3@H zZ5LIhe~hcNJ$~`tDJk8syQ;BSc*(#(?kxk8+n3>X?*ekUzcQcqV!z7_+pPU_a;Ouw zeeS$&X}$ABD{!UY8_(&ahjnzL??iVen}ch!rEnYxlf+aEnFiuEcPj7MyH6Yu25UuR z126rYb3C0AdATzZ47MZZUa8t0PtC|&I^VK+9XFl5)#cT?xoK*~jEj+l{vF=;<0#E8 zPRz&EHoghx@ASB7sK*?g5)}ha;#Zc(+cw;M8egan5fNwL)%5uZ(R7T8+=Qp$zbdQd zZ<;%Mf zc@RK1(A4fBu;LP2WFAI+qYE^!Y2tD*|_KL{^~f= zFD`e*>5&QtR-jzPjE?KMFS9Ii%nGcW+v@bxI9qQD+$ChtBHBf+4GDTeQmzgx9k*1> zHDj$2E6jzTBxZQU3Zx2@PDNxbZQq=0y`~X!Gc?#|tRsvNwsrVVVPa;ym2Jw?Bdf9T zpyMB-A9?DOF9o~1uugJQp%L%YIH83xn{)X0gS;O5XdcGv)!_Gh8e%8g;8&w_!&% z0%W6K_;wfl_xOrY#sG(N#lKeqoz}d|E0?B*BaA*W;jXE!0v~0zlYa1(*`d7V=2n8Z zmsc!R61b7sFg?ncb7|ROiPEUGzt++1dKB)l4vZdQ?_?@(uY9j%Ue&5bJFI;17W(qD zd*H51W#;4W|HeBT-h9u|a-ZCn7SuaNE>*gdw%4l`Pejtr@=jfNnscse4}cq=sv0_s~7 zr7Z@Bt=7_=p~Va=4`kY=#T37YlO1^2z)}xjaXqf~{oA!olhd}Ie)|U&Hu;$0qWva$ z?)fjo$8Al;ZY>>zUkmvMfc?PPxzgBy_7knq2U zK$*18AL)Xi`U=PQdn+qfO*kiUL1q5#X?R{t#ie54O?SURwfL4S-ODUDUU}aX<|gZC zze!2OPoAkcQ^0mnNwU;Nc|Uh2hn(D9eU4^{Y2kquW_c9K_4hlju3rkxjV~R7{z?dk zvMf7!u@a6HP35V!HJvt}jd$`fE$!$3T#8srYlLqyFsX+Au3!6cqtw~$Ei@vGT6*#Q zdGW!%gRpymt2?v90qyXxwCrEsYpJ@F7iSY5sXx{?PepVa#v3(N^RwUh+0`Z_2*ny) zFL#AcV}9Hyd}>Tvea1|LH{r*QW>a8-ICq zsAQb!Q*TvmUv`*7D;Ok%V!9`05&6=qKr5QK*W`X1RSArf+RumH(i#oWJjJfIe z-wZm(OAXkbqon66ifrI|7qDSo%-GpGrC`ImCXi4L=^+>VoV~n5`KXnY?S$OmQLnT+oOSq}&n9rY@C(iA?#;=ox%F3uE_l=%QPAkKh9z3a) zxxMg%+Xm7Ml|AdDdisQFy*e%HOSOvD&^gc5*lxh{a0l~5EAFQO+!udMF9bWzN-Jw6 z0;X*uTST!WF+>SFNO$JcJdd>^yrz!h$(y$6I)T8db4>S+x%;|Tmq)~BgUr?^rb>p* z26|^}F7)Ahhs>$ZpD(67WB=ljdZscwU1RmWPrAeDAS

5q{BK zbZ?Wcf4kcou{0cdTVFqP3fDOx3@*P?q~pGCxcef*wme`YZgu}TvwNqKi6dg86lwaN zGOst>_!1x_m>A!yKSgL*F!3d)%kfoXbk%lffK(ooBJJ&a{@g>Tu9i#VVf!6?5IP4} z6(|o4$5O;8?~1X&?P5^RNF#howN`%^a0~y;%X_ z;?PlwtUCxFY`;j80*+`Ao{MZ@cG*ZASJPac0Qe<77 zH|j2TyNdxN5?_j}u5OGlN(c%O@1;nCkNc^!zJG3jL=fOgkr$zJ&#}5|gb4)<2hUQZ zxhF8dyRW+=yVHII1c-*Z6luT8{a|E$gh~VguM}ypA1dru*qcy*vHVJr)&c|kK0NkAa5(&nkUy8KY z+U#_@<4eFq7&s_J8jKD?=vKF1K_nu1D@EF4r;($3FHQ~|lWr-ZE^cI&S${PR8fW_7 znDF2Kj5PO=_7@0~^SU(m4^|@=yM2qO`|+fvgPmp-c1C$m`O2Gf<=PS`ck@7n{=2 z;BF4u+s`KEH4B>SZzQF)XFz31Pz{Tk6PKhEG^jsmPA@rWAOF!j{^TmgFK94+CM9$F zP9`N&Wi3>W$J1yKepWRc{LQSie;;Z7zIxDKa`TuJ2%*LI0t2DVdkQLKL3sC*;*ntm z>|SEsH{2f>wC6sv{QGl260_1+LZZ<&{zsv~cb_E9y>}|@{hFN1*S+FM%G8;Vl$o(+ zXQ9F8-uWmr=gzq%hvwbx-jS519osh{OM~6B(p6`Erg7?71oU!V;&88o&MCPLGXXI>VR@kw)-h0vUOG}cF3^`<%rkO>W{BghOj zJC^3uCl*Fc39aL$&^WqJ&q34Z9V!%^(`Ymo9eQg_p)vHG&`b^)d1(>)1Ua0!&>nP; zv@{3(pFf&E=S)iL=a0tE1qM8FnzZd?krp4O!ROYb9u@~=kiBzR(A+twgSkaL8atP` zs(O5;j-$cL%3uoZIrof&wEqTO3(fxY#CkvLtQ2WyLvY z>W$FiL}}1L1!#eRNps$k_T3pd;GlW81Qh_23yrti(=9O2Vs}J?-7PRc(T(N?wZyv7 zl)IW9tDr@0S!s~FpgnG@(j0gDZP29kcH?bO=hAeWq8}Z(p+E-hZBsm&TZQ)8jZHdj zT6@lgtf%f+TF161>F{V9EsBGt5~0P|Fj7+m)!4|6B?Pst zmNajUv^16!lX5hjvYM3^v1>XssLa#F*`s}=8kO8F?vSQjhi=hp0wq!2dS?wIHp3E0>HE zU{xm1lu1Z~ziG7hzdk3;Jv1Gyeb-mg)H%_iuSR20zL)mB-rcieg)oX|t!hRz*E?C7 z^I8ZY-nI;Cw8(pDkavo-$2%R(@k&mk{dAZtgw`u5jdyn!0q!>1+ucx&pPfr{?PPO8 z@#;>h(W0(qN@!3wBq`J8<9gib&^j(PC5_{Hq@!)zISEy}Q_@~s$(b_LG-(ehS%E1G zn3=VxNofq%GhGt_E?mvTm0_TQ=HM!{-!z)Pt>mQUbZr`q--a@IY5Vr=m0df@g$6ej z=)Dyfu&9IPUdQcPj@EA3ty_hr4&>Q9T6DV!S>$u;s%w&XYI+4tB!*2LS&3i3tTgy4 zWg*L;xz|YhP-e0;cS&h&c@9GBn+bx2D7jH+(KRG1&ov|sn#;@zXOfqsd50#Cw6yMt zGhI^SR!7^`2}~L^ZM_~1ZYD~0B(ptNoJMorNJ(o}Ytoo?(w3D1A$TTvbv_S+2HCn; z9TzYw&37ktbTn&mrb>&`ou)vz2J>s{3Ynsg**q8_13)2P^_8M)A&sw@;; zcOY9|8@8z#D@N^+2BAu*Q7aMzQM;(U8dOW|s#-zp-4Ya`_RsFH_pH5XQ8jB;Tct*Q z_sf5|f82Y|Iq!JRd!P3?SOY&@V|P9?N9zBCP3|pe%|?Kgvt1WW@6bV9T_^s#Yll-U(tUiv_WpWGn2&WevxuakxJ>CXtSZA!Yl0xwFp&nmcoMpEnoOqD&Z#D z?Pxi8kN`jBOr!eG#>6t|eYKRR@hGQFu@Jq@?}|zR|l+*f>cn*`6jC14`mnUw}fWtckl<4V{Qh zJ<%QhfvSCRM9-tV*$qAmm9{rZ*H*HckubiuG_BB3Ir*j0p~5o(xo;QoqkdxpA%W^G4m1|?se(TFX10o)&1 z*)F~Cv?|b?VBCJ|C!6eC zs5Rj{CgJNQ-OV6p1Sul{5!-qmS4g36VCcdTw)K-jtySnq78WKN7gBRdU&~pf+dl!d zK)Dc5IxjV<%kXU*bxtsKsfVDm&1*e#c4$3gMdVB_mzdm6!OWXTFGt3YFn928wY{mp z&X@AHFxO^3L=W07?)r%zD8JJ2$mj6?K|@pSA!Rv1RNI{_7$0SG8&}Sc4H7||h$LPZ zH55qd_}8YiT1-zmo_dIj+!{TO#J()hb&|pkj|=n8h;qON1a-AAaj}->Z~q!Rcz?TL zUhC9muDGvFzEEXMM0ivZI%R9;b6`JJSl6C{#8$G;!>~#WW$HI1clVrMTVp{;xBK~> zoo^LG8kvj)V<+xKOSw7h4odhKnsMjnN=f!puj^rN8EsXMOmxilqRk?w1Z)InQk*rnJfwyIbH4{lCM8r1xvQ{y2A zCmx<>96v^?QVO<$!X=X5^De;&?;oYncoGY^M}1TX>!#EPmQNJYrl4*US(H>Ide7Bq zBt3whi&!y$iw+-nU94Y)>+GyT%!tx$luW{ZWlbaHB;g;~R4jYPnZVHadwaC>oCU&ap*xpPj=3%0w zlFyBu;vPn?TY7nxSgLzVmzPj$WOGgOWCS*bvXPOqE_RjNlAf3x!H>fovQ1uLVhOBe zBtD%*UBK1Ud5=TPQe^QK897ZNC6&2-LB06+EY`@L!~l(_+DkNkZ45oyJYgdJmc;VzBTKJOYI5!M=WWwuuLc9{K?s3X?H%O zyRkLH=3W9`Ye~_Tr%^*thgOaar1)Vf{$9A*X69P##h_p2zYAIY9YPrnGzb3(eridm zXgjs=IpX@1>4#oiX`|>_9$1C@cO0r1FPmg+H151{k+b27mD_2rmp)}-c@-@HkYxLR z_BTDfR#oAOFX%XqL=?V10*J(C2}kTb{4cgo+ZT$ZPOA?o9{7+&|MLIvLe;K@K*e`@ zb!GR0$;i7?eTTbUlV|QV{^#s%vb&JZZK~Zl^4wCml zL~& zRJEpbm>2RS+TgRGy7-sfuRf>r>CrTE0hcQ_LM)bn(fgBqej?JaRw#<-v1uESLG@`5;qr9nX(68iT*Pbn za~nSkH_OiDhc&%Ud+z7?>jzQP)bupR=+vud!w>mgI0uKqo(I>b`};q_0s{e-gpZV} zG$W|Bn$ePXBb0VB3~I40bb93D&uC=S+3PpAv$DwBA4!ED@(D;{K0faoYJ>(?XYxdz;i^5Tv-sYQ{A+ z&0Z9Xlke4b>kT?kjH}qUF@yEs!S-wBX#6QrTsg^b8G#!n?QLVTe3`QTM*(5D$o1(CTFxPZa)wIe6MdgDXL@D^ zZCTCjd-ah$#ETWhGFpdlg;qNH)`5}sJGyir&;rAW%636;sX7nn%^xQY;rl$v(V>S& z)=#xvJ_(a`?HZh{Le%X=LJnho8+sjjUl;kS9nv8-yAFhK9sJ=bY?r$2pN;&Md)6H% zV5r>Zd(EZS2b|zyI<)|L;mdh*n#ddd|FOj$+pIPZ?(&Jz8yDwyR6o5Pn!u^E{`1G( z_G`_n56m>QRQzvsE?q;9=mcm5R*Z4^p}t3_hL&YzvPV8&#>Secy{$$5Ny{tWZ8=-| z8t&z=bJo-|54z)Rn5o)+w^4yjp!Vu-aIIa!k)nVp{mkrf3sAin=#G5*Fz&4Tsk8#r zVuA6QZ{UP;rf+rD4Y;(H>^Z1^uj z=@5vX=+Cux(?DIF^In|J*c_(gmRl>I#O>j(`x#fJw13O0E1h1 zsM6#+CEUEjN=@Y_)_uU!@@O!u2>?Ke?bp)OEV!2CY1i#6bv`Bk51M7;Dxnchh4pnI z0c(ipOM7*14jk&pJjHb!ABvbC>Y z4liffMHCc1dVc)W@;V|4wc+m_91 zAjAYduzm7m(|>`1DLL9h?Aw2!=a)EY%9yj!qpnh~>*X9C9hQBX2fTkeN@-t5=(Vtg zALd^N=Ofvqv5#kWV6UhnpMI#6lJRNjk3b_LJvthEK3+}UfkQ!@!OMSw$&d)j=M4`} zbKA1t>S;#Q@v5EKTDxj>8bWuMu8KOMpYU!GYe6>;2mC9%QU#d&f5)ucU|G-fY~PCcd`{0V%LBLm=5fU-$yolgde-iu@=x;+IpwrCDp$L-=0r?EAlX)oWjVX> zU%Vld`ihVj{Fs8We9bzp_a*(JidqMFCfheMf>iX$;y>7E@-xAPSkV10G z+cAG+7%sgq(02d&PR}+qb+nd*a5#jl#uusWwRLSKqtG^XBcrT=Id@Tiq`5YPPkjog>HM zd)C}vv|}cDeL2l;-C*MIzdT=6{<9rmEd;7G?p65{#iT_}5skAE>t6P@u|BdZvj>Uc zVW^q4&t-@2Kfmkh`l_C@9Le;+X5du44EFKtz#6wFpeXP=U8wH#ozKyP#zF>uvrZfZ z)>`45S83U1rqSP25&KzixT{+8K9Y%vnT7gvQDrUMA#gUJX z=6Jo~?(cw=D|*lkWK+=1_JA^Grc~V*NwoZ5LjE<^gX%znEx-I(hRG2CMeNFdjMt%3 z$&<&B7F^%i8CmzaZjWkXZB1i~deL)#-j50K+}bfA$&7y*kDb`s3g8q?jUkIRUSNl$3>p19u#%h7GM;bXCT0$?K@JPw_Chs z9K4v=f__!@vo{t&7`SDjC&=& ztUM}FkWQpt+x?X|Uf5mmwSX<0TUg?;->Du&NkP8dNx9ate+C)4_nBs>LB5k3bKKh9H4tC zyYdH~*tz0i7DB%N0s<09OPDhsTr93Xc>&@Wj8Og)@+4}XN0*)_WBbGM*k6?dej|&p zz>EGeBX$uwR7-v^Q@A12!sUav8Um>=vh(rp^pON%zu@v>cc^V#?ZJQeZun$b8r22Es1r{)zJ@9|pwb@n2 z*s*pRWo+s7N~o=R{KRLOPEOFFu9Vt)vlR~6Sb-mUsb^pKt~r!?q-v3-wnx8wLNfGv>PJ%e+ZR$qvS=2S&piU> zAH=Hb*sDeDG4Rsg0F84shyF)Pl^hpuyy30=F-iETcH&0`sQUK&MCSZ|@tkxxhy47# zm)o1>I^E=bg^foaR!{a$Q+}if`PQ5|XovD}!6+G)c`lmHs`nXLjKYpHg;r7vefE7b zd6D&D$F?*%d3np{J&5&iv!67t^Vp@ReoYkYeNpvIR9Bz{(p2=3^!sjd>gZ1%-{<`j zcm!WCql*e9P{g}?q5F6ma-A1Hx}we*sWS)vRbK~#rVIs+{gxL-$9Q<@8DC#r1#F%v zSl|PnUN~L)ePB{j*Wr1|ap-eX`Hz-1rQ^@bB;mc!rS#yqsNjH*W!0IoRGe1dar1-C zq>E%xx>wr17jUhp0;qz5t$*b;9bMLqfz`yZ)eDqGleBu(=cHqeewZP2zj9^&oA&Ax z9chj-V-c43_7?23;u*8%IF&{F&&?3Pn)Q=x{kl8ZVxc_CtcX5 zKrSz6R|P4$3c8zj&pTK}7?M6zfSzfKuxck5J5DEQQ6RMW{ z>2g6MRl%`Fn{(C29!kf(8ks^X^i6vng!uR)&@k}n>O;Tr!rT0CAx6o$;+ZJ!UVp-hkO`s@phHkCFE1djn zbR|V>4oPFL^W=FzY+s5@PI;D_02fT*scgTp|J8qVJ4J}TpeNushz|cKwwr$u-u(<-~8lsF}hB?&~r~wT`uY zJp86iO@|~JJxjd(vgU5buZUrCa=#0|gzExzqqA*lmGl0XW)O`|cr{yil5n4p8MxnX z*Z(?1yV`=A?P(d4VzbT1WPA@<2ix)K(pvBfuUKg3YoV&kJ!_An^LPvB$JXHGff3ra zwl#;&Lx&bwJ03=FAdeMDG`+_yJ^268{Nxoe8$H2kKQdUV=R%@1){`(`oQHOoKQUf~nw~qfucD}5&TVp>yt}Tn7h>D7E>r{5ZYtUEWE;y(c8P?7TKgaH z_fpD-^NE0O?SwFx1=t8DV3C}y91pJ-Kd(Ly?AY&sVx0#chb$)bGX`)XWS~%63eP}sjdLqwx^8-Q`9lRB= z^V_p$fi|;7M$%#T<9>HIZ6z{>vCr>B*Z`H_aOtTN+b zn=4cRXP(df3OGyd-jNUI_3FQ#o*wnQ^BU#cd)$oDssr;O$rU%ic&CT2Ih2sxJZJ3Z zEoXBl)07FQZ)Llx&5us!r!>PgZQ@$p)lB~d{OF3LiKaLMN-l>~o1{kbH0vHOcj-7H zqfb2n-F?LDfB?I|y%qoad0h@NYc>P_r3%wDDv&4?x{@D`>?G$KDzM8Px z-};a-+}+i2_+WeKpjuu>NW=@Epz=9&M7Q$i`__^$7qLzL`>^{)kmm+V8Ewux!5&Tt z5F;q1sC#?m-NQdwwT5Ax;;2hyRlftr7cK)Dy&D=Q{!at`YftId#HosPx9zT=vh3>w z6k3v24tW*W3~f)=p1&ABXQl=*`%Nk)jcM$NGt;G>r3Jq@6zGXAp8Q(Fn}1p=IUfjPl=gOxW%CF~7D(HpzHy^6?;fR+yR*bZ*=ml7F$2E2JXrrBbcwU%gUO zkn-b4+5XC{jQ)+L#^az>oMRZtsoNJbPUUB&(LllI-QV@*mr?ry4&2qVg+F}~ujK{9 z1%;Hh58M8s5gI+qCMVa)G>kNKOsne;wsRY*wRsd6g@V7FWe5wfq7!ykzPB9x%u(m$ zSwW|AF4x5WP`SqjYRh-tLbgk0*O^e)%tOJu)LkH)=U# z&BTWKWNi&}EQzw_f2y`hbz|)-O6=R01D_{JduJ-90s?`vSK5jE7Y`z!0t#nVT?HD? zZ=>vSmFI`ABghg$XU_ip=W~5qBhY?f`z+uu6PQy7+`TW9{bvjgxN0daZWf-e?I$l|CImkqdi%l(*}O{+g>Vc;P`uY^ z-g`u}AuAsTn)h0M#*-$WjGSCnZFQh#?@Bs}>TTZdOC}|1UF3#}7)!RuEV{!d#>76R3wy_|(lAHN)C;-@X9SQzEAk=PK3 zSX(Uo9OC~Z-^X6yMz!&VO^4^l>Qz5Usw8}9?J9!CNjbp14ytoUD|GOhseiDytDCpb z=O7hLLDBPZlB1rSEXXT0|1%ALt|1TY>Nve!0uA`aw*sZ4eehvN#75Vet;7BYCh}CF zz#^v)j0{4M8%IW~Y60WR8u)(Ab-8}>s0eM-@1DvF=T(wo04?nL(b>q4u9pVS90Kq8 zRX<(7IMBU|vw{S~zvHgVidhioPsf0sGIAG#M%89qR z^?}!5R(kTs|7=wt7Qe3=5=ED9`H)nQloa7t_UYkQX`>%WwfP?|y3V+nX`*dERz1zW z^7G}?!}T?>#deGz1iymoOjY-r+kNCMRH7J;r0hOjQC;>-9uEhm(kAYs+L^%#y9Jan z)Of?QWA0^5VdTF+FZ|w8;e@^60f*wVFS9oEk&zmY#fYDHS0CJAh@?oY_%w!EIA>$w zqd)rg#Ia1k%nqq?u96^`#*BNuHSxG*QE`Aeujgl2ZO8JqZLa{Qbn7f|mU_s; zHS$P`HS;sR_rWWl_5=3nuA9kLaaC-4Ca7xEw;jEV5d1qYFS^-<2o+&g=r~LD$QwVLLw0-V_`Mh@bFFLV2 z@D;u5j_XWqr!|{6^-#{gMdV}~Yj!X8{;81uO2L{Y{wiZQB*aT7P#BCU<|mmY z5jEE|*rbRDQ(9zT1@E}9g6n0t=x4ghsNcp)Pf$QA>72P>h`cy0Qu*?6ZKVoIZCBO8 zL{YgjGIBayzgRH439Fe-9C@=m8_V=JK;|jFqPrOx9~%p{Xj~kuOe>TqD#jriYZcgj zr8m{4R24XzqVLODQXmNz8Pva6n~`)3Z^^s`sN-XG{bh6;1NH5v&Fo2n=E>?(`Ww#|*Tl$mvLfhv}d=H|Gk(dFny!^EugX)_ir@(t%E z2T_Cd;>5?#gqY;e);@LF5F$!W3_{rq%dmEybw-c4#e!LjAaO&?l3QdsZ^X#GtK5Vo z_$t#aEyAh{Gu-OGrx8jdrE9ZTSRq!@_8A^8`6h26W5E!(467yc0`SrX{+4EcvQ5%F z<tj(|g9PqLp&s=&T8(91&UExJc^Os=A`g4V4a|~`=+8EFHgguVoWz3{QI;b~ z+FHcEN3N+@BSEn^;#S7Tt=x~rF(yOkDEU{yR;It4&|+4BTiv_%;9R{%8lxNWqZxHH zDM-5H5$XLqm3zOkv)_tMXgL<>JITpsUWN%JjX1v&U2_`&*8S74e2%*@SBjqt0|xG3 zX=)xq%*H&36193<61zjFsxr1PWq;KQdBtnNZbB4B#iL`DvNM~Y8NINIa`s_}fxp>2 z{pgKkKMi7EtLmH!b9dDacfq2y$ur339sLyAlkk}+*??@0m2zizmf|wv+V@0v`T$cLwGDVOm z>>W8a;2^AwsD26Q+K-SZL?aD4){vBStB#)7hs%A?FH;#i^%$zXO&$l;wI^wTn*Xko zp>Kc)O0Y_r%6!w`O~p*gnH3B<`e5=NBM?LssZ7gOFKkjsL}d}KF+>Y|=CqWPtYDUu z!4xq|%CovM@0uDGj<}{JyI0hTOXUpu42_Cdy-+oFT_`StjO=sY~`IWo(6 zZm~x}b%iAkB=PB^hH|nuB*=`i8Ra#~5*``oHJ*jR=Ua<=WMyCVSw04||I%u`oFO82 z#9;RxG?qj3on4F?7e+>YTUhkvWxp%2M_9iIkuvBFvioM|*^0gfE5Mzl6vzcrT z+k9_rT#zMe)V_YXMqhT|xB1OnU7;ff&y9?WAHLv-~IhYbqFLtuJw;Kf!MShv1CXVm7TzB&6S z9tfpIjaiEu`V#JZlz`4O+U6NTe_y~#f5h^RU<1>j5wqDePSnc!0UIH6v#HkONkw#2 zZ$@|(2=ZuVGOX6aoPADi2wPXqHz|~EWR8iuVP#EB&UAqSeffBZ8HU9i?mfit$eh^c zIwoEr(;#5w{Onu3IB*XAAhxcoR>^{m$U;QPyI>92q*+xhsDOZrKp@DMtY6`J6QnGS zY*;f#Oigvf>$fx-X)GyBWt)`<-wE+pU7dXs2Nz~J*~)b3+n`6bKi5+pn4|R@3$m*9 zvK-A6)L2 z43L&A^P;du4mNrEtddZOT*D55F)byfR(0VE609|Z^@j_H3kxTW7#hi=c(T6-Zj98& zIu65SkRkV2Avd$lP$rHQS=pN)$qGeD1ryhAX_OXh=eT-ow5utmKq6io6mza`flo6N zwzB@k>bX^g7vD=uX{cW?^hwL&loNCFCXTw8fds@Rg<4898b6k}cGun@%x?=zj_l&( z1@&<5BbS#J}NP6#~cq3?uxu3_Zs*&+2r ztkLKw70!rA2?j*<%q$69Zt6KfX7d!~@7J}s<7+(*O`Vnt@8b)TGxZ%MB_a~4zu2Bze!x4$$5`&HG z2fkC>*yrT!jUB*n=NC2PAw;O5b&>`aiP(t;`T=r~o+R$nhLli?&2HovPCAzdSK$armsaM zrBc{koTD*{Z?ShYfu$&bw!mP7>|$3SIYRYVJuA3?bSS9IVv6?}b=R_#lLL!bSube& zmoT_EOlA;Kq76G~Stz)i3E(CtvGUIhh<(3x41}%plYZ+n`x_C_bs)(S>WtKE`isFa zIW;4yVI(^?+@YZXBq{FkOr)=uNat+y%ytr&%MbhgcQEM|VZwq#R3j(Wammyq1M^C+ zF$=f=vC?PX83x>3Ir~u>zG9emR9{4pqgG#5cJCaS;pYOD6WfA(Gfj6v2scy_L;!!P zEoVjXjsiFJ1$0yhmS!VyvMxk@twKhKm5xpra1eZMwchg$Q3)a$Ozfw=g&Z^HF<`S~ z5EyMxi57y`Cnp9-QWnx?!6v^EZYq{z&F6q*!e%pPih)(sN*UlB(!L?*e1}F@B=4Er z8df)FtKgn=^slEOM<)U<)3EEMo)#xRfx+eGxkQe$SvPs zUsf5+G{H|PsKA_5Q})x4iel=SAxNehGQKIhb|;O1H3_dZ+}0dT+>C|5jpM*3!V*No zXcJ793#Ohm%@lGQ9j-ceYspZc(kT|4qigksra^Vm)i#ILdBWD-KGkg6DTrw3Kg0x} zfWN798MB2ORr-eYLKQGw2$JrxEo-0Lo%)vkYIdbM4}@VhVz=H6LLL+GN(7*zy*%|7tBrsJ8#^-o6A^}) zNtNA5-Ne4 zqB9Ww7t8SwZC8uFa!B&P6q0|!1&u96B8>{70O6SUVFJ|sC-Q;+s6k^nG9FMWhKOg! z*6V3;%4_7}0gJH=_avs=B4o{YRRGN%ERrHb1{YprJlX|9-=NA2&8fv|nx&6am&YPZ z`?71XckURXddp!j+ED|rivY&OhKMRjed7^nWcXNkmRp8sHJ6k?&}f)7=VhRm1rUI5FuUL2Et5fn08|p z_e6iQs)b8PZ4fFKjS|GjDatf&AsPqO3HW-L;%x(8OHO&wsy$%$1jM`hzL-%FK?h3$ zEj`Rt-iCd$RXr>BjgIRx){re29z0n>P~ZW9G44O@&3s1<(h7)CCN&j^g!T~DPP(KSy;{@*klwRjU7;e`@C>N)OlkVmrRw3P(B zOyXr=2@Q*+jJ|GQHCl+)#e@>rJ~R-NxWC6rDT-l`M(Ielw3~at=8N$WQS}w1TjUlY zmjTLZR`25m$PR@ML0dL%5qQQ&YJnl?%DOxdwz4lUcxCg;q%#4W?P|>OD-NRVxbPb( zxSQ4xx3!hlAmlsrmDCCQJJ~4{+>+57J!K8 zO)(Ai#2~<%7PcN&wL3n2+wW`;Bkb%E+$2G7;0($e|?RVvE~uzQJzi5R?1 zq>Kb$@d6XjUoQ@@^Xo@9xD3NZM)OrYFB>Bj<*3q(VnS}ur6{Q(_R7dUOwGVjUMwWBRYotKzwUb) z9mC69)~S_&O5wL#D;15Wdud5qGdNcn>_Iv&yhiU~GJ($5ZI??Y0@ILg##+*q1j!LU z2x^2LC_82=7M3N=J5RhX>Qu)H*XT^ISSQo}scM{0mA$3!!<%1JTq`mvSsAw8SY2Kq zHjw-ll|)9~&`rR(Y9plGhh;n>(T3)f6^$zkO|Heqhy5q_xW8cCm~x;K&!C1#VIA`& z_s12f^}vhf?3@Kw?aAEvsxc!cI!azJq13PAr=C_`R9MnGa$`KVk#oIRDXTZa2;pcz z=oyqAE>B~B^&Zqy(p4;|Zp$*(NY9Pqcw~Y}|6~7{QEMbVCrDBOo>3mzclI@>n4_qZ zgsg08p^qMm5Nh-@54MDq&g8~75D8qfKWZS6H!4hDX*x|HXR*p^=`PRYr`h>+VHW-8 z!_6|Yy;ZueFb8Y}-zVl|6HEBAEv%@Ib!bJ{(v}IKYzdlWadQ1&F;23S|qigO4sQP>(r}488{8bVgcut zvgR~ewL$5Yw1z;FromUyZf#)X`f?OqWm9&EMm$|z6c>|@?tv#|1^k28%c|kD#sMkA zN3vq0x1;80H{sYSR#vLGMzhBT9KBQaAjR;ZBucSSvD{aGm|vC+6)3v+RVdcyBI6vCrSA#1%u;z2fW)+jlRPNa5Z_*U^r zB;R;OBi^m1s1E*&lpNz*-IPiqM|diZM(^84q0B7kJb8<~u)Fq@-Hki<-jo;BnOd+C zEI|Zv7}|$1ntti}vwr#CjudvctK_LOjn^}OsGV8yh2QncQI-y@hy2BD_sg3)=A0H=f~xAfbndj zzhOrKyl5?3G?69hYD4Gw*DTX)BEt@qk?E(n_e8TaG=Wh%L( zr%$#rEalDO^Rg|u6J!e86j5-FJCAimHnW*C`+tPot6#^`t>UOe7_4x&W!d30c5hYA z$~;s@nq~C%PMuwB*f%E(3GGZfHd6v5vj8nCIs+q$g{s=^FO*|J$pOuqMv~)WsBWEkEfQ>tIrC``+;2P*h9obNHK^V$PPR92(6+#~|}o6Rp` z*HYX?5@f_((@COb$(n`iY*bjh4s?CnWF=+kHS~$tSjMI3W~lOdGfNtKI4lh)iTyFM z@2o)7Pf_V-^|K^pP+`m$Loi|QHy82P0EzBgrUZ*QPKqUSCt zE$zTP9_tp?s-98udgdN^y@xJ$l{zb}rzR%$Womx>F`5>0t3hFIMjwna$IQo&%x{>c z<6N8Tzt@$}`-SID>$CaDicJ*5O&%FL&*$dq`NJ0)%c~MIe0oyJ$U&VW8zoKhvQ1s; z6oVxba*Fd+8zK_uPMXcDx8 zO@p|7+8GW~5rj!6nRYV!Mc{G`k&$VsVj=~4bke#JoLVGwEhW^ZE-X1^V&u#P=n6Eb z(`l4}bSs)9TTUZa*BGc?*<}9?tXFh{C6LTexCk(j%CnNnPq$VK33kfeqHmDrlpoqH z%AWQM-|C;TE{Jt7@e7B=PK@Y)>|Z+P@b44YOw8Phw`DPlS+4~>AYwA;$qiAl>ez58 z!y+kAnNZP23`sg+QSR36FH%nc*wy~Mfw8}-_bZ#qP^Jea(V&z+vSezXbqSPcM~8P` zEESg|Y%W^AaD0W@aPyB7c6YmZl0Js5-*{rj0S#$`EwA}2BKHHzvn-!QO3U6_= z3yjRFU+8N?HjN>AS2N5&YiQBU#AJvLX9kSKq|(`ysPL+9&6EPFuG>gf+>3K@Dbdok zUnqur&$ggk0@=Gh(*N2?xw9 zF#|0vKIb!SJD5TX*wKuGb;wv<%P@kRyd}WsT1Dz{hz>p_aDQh^P}%lR$2~aMHhaw{ ziXNZ>wa-RP2H_5yX(+)%f1dVipY5w6Sfm(ISGzt`lTQEe`jKxJ+N#f;w!O-ZkHOP~ zt~Pxfsm%T1Wob|I!C~V+3n=rCt6$SG>TEg%YNnz5q7*SS``6SCnjfk?V|gr^ugiN% zzlK>@hc@(|#%q$d%-8U%k;!EHYVRyuNm(=UFkLt0PXk|wd(?K-b{O64)dOD(9eg`) zAE>^t?IQR2@rROXMR|wtF5J-J?0V-cf4!jaYuk3nVIq%Hc#jM5qPgN&Bl#dDo}xht z_3z|!0FGK8mnn=p9GN_bTwAM?Kt&%c)ctbB7YN*gqE`M3?r3Gdp&9ugP9fl=^b-f3 zI&wGE!xzr{Ctb0V|k?d+vIt2M09 znoqT|zZAsGKy%3$Y?*iW1rsd;=yLbIS<%4hU^ntx3zK#aY5(=sp5adudE6Ll>yZHZ_j;qezeU+(XhsKwYNK+-ct=Ygxvr~`ip&|3(BkDgL8vg z^SRvbg+Z-Xj;H-nV-UT6o2WByYCMC#$PhZ@1j9IyD|EX3Z^3e0)Q3{l5V4*<9y}qWVL>BH`2o3^_D$kVb~ff zWUMWTxtUzX!9L(9H9jU3dz>rR+gXh#r_aEnQWj)H zq)lAed2rJ8R{_$lD`DTH{6Vd$~e&Zme06aPQK=9ZE>VRUXXbP}1)G7b`@r2>x zzm?P4PG+Od^DqCvN>ET@j4?7q-%aw|v^9c4)p58&2*jSLaifF^YG2SBD%;%R*2IN?=Gxd_W`uP)%DGo!qdVaCX-uq zo6%njzmPs8OJy<;O;JEUTNqMtoLh*jmUiT%!{^TF=Y zp499&Z{`5qI}NU;d#oFkQeIVApUzhdZ)mGQZ7G#lpE}TvNdNcDIN?`WEYdQ(X--5}-j7(?&fn(gKgq4Q3aR_Y#`)p>M`e;r=SF&K1DSHl zdqEFLMP^o~ACiQnjOY5Q6pm$iu9St~?j)S2a}U~3;B!(epI?9Pp*XOedcWO4aY0iJ zJzCof?Kb*RFVOQggC^Yq@_0Oqc#uBVNxt)C(k#{zW$4u5dk@i&K$8~cb_BU@)MlpW=$0{Wa zR+ARYK^|!`GnH}+Iwm=BCw$+fi`3=L{hepq5z$GHOY5J&t%Z_3{;jcCwxEis(0F!p z;Kk8-sg7rVME6P?GD-5URS{$>n?Vg|l~;5R{T>j?S1g?M;?WHtnJzRU*Onu0 zbd#uONUohsE{5COkC!AhRe501uhZh{812Zh_h@2AzGk0!>HYO}_``{UA1~o9Q{@Jd zEFN+e0~8G}%rR7kg`tSZ6qmPC3fUK#hL`cD82|f>d`B7rI-NXGVQO(UFs9M6#gw#_ zTlAmeeP-m^KV{OuH&WDpY-QZD=i6E+RXY4bQX%rZ8h%}$k+azrTI(}NGPQFk5op4! zUH{qDyeY0}%3#@KXZIRH^=4Cz>{9ypj(SnayFoG;{Yy|Xt!&FDA3;l1cYE`O?N(0i z?{OJD9Ns-9Z}uNZafa6jHXIC}&7ZSbp1IWAa(VLJ_8sdJIS zHh3Ooy^aQ2_Ky>$jVkJaAv?*>b0$I|lTq~ZZ;H|o6P;(x3d&B6=3+_V3gvzv@ZiI7 zvEFP1`E{;+0Frk8`{L+hOE_cChEfot2^swdT1$x51Ge}7k~m&-?K|}ge_7byNQ(ET zV!bQOO|3BWFJAI( zXcslt3yfpORwGX~rK1n|2`LJ;kH#WzZxU}pMdQ0cT2kziPjA3wLeJs%E7($6pOPuF zYPA;6+&`l(j~%gq9kLdvJeBUUFt<=Z$u-?C9{WJZbScB(?>JOv5$=FH<$F_Nb8`)7 zj@(OxR1ty&D~X?9u>>=d5l9ufB&xk|>{;uuggS`>jxQxAq`5Tz8Md@nlA`_xO+d20 zK9^WQfMdZu9Q#YBV;QGS{AcX=Y)GdRG9XR0hiJwj+vwX6QxQp)DWDQv6YB*KUhVi`xq)eOvqX`Lrk=zAR;le zm?d)7U>=?}#=!oAx&RQ`$wJbBO@$J=L2=wypfvYAluY@z06zxJ98{?g0jqS?Ww1Nm z#bX=0nEMbGVo3u^etJq!oH+0fN!vTPsg2a+7 z(3tQFp1q7TFAC>+usn1b4$WxVZ@15p6$J}~&vnObPen15)Uuj2XtnqO%q_#%zPUz3*kL+QEk2$&lTx~H=(3jDSF z3S2PKh6NmyE1Jta4R%OX-&0t)#4SoSt6o~9l3kz@k+6pzvSch~Wc}5c@IArJCoEST z41l07)Xe8?MdvKVB`Xilc!6QS1G2J;6e=N4Qm%@9iAO>phQ3wOfWS~+%2Ik+e&a*M z5-l08I%JS-bSMj8viZ?DdY$`sJt7Vp-je#<^_B#MQm8`XXRIgKL+h>dw^*=CP+2aj zCTElc@!H6tG5*GYcnVmxsLiLdO>j-6@2wuefrzd!4>{LB#YPN8(x}&SHncGbE8l)2 z@*;$8ksSzg#b5UHbkaNOq4SNo>{sbIP>Pc$48#-l{@Gfpqc#M%iw@xpGn~+7PEd@* zr(pYli&9oeW=oKGp^@$ZT6`22Th#Z};(GK^s~0EYs(=vTA_kjXr1no&{)SyBJBLP}{{g?XuyoW<=m+GJv$j*3lN&F z<`s79?ooWPh)lA!DRGDgtcb`@MlAqmDOvEy4)dA9XDy#^KuM`m=I9h;UXOGrz%=*XKCM8;};`;|Xah-|GUF{Xy26FXvgGp+|zl)*9Z9c~!2UBdq=k@(k z1@JO?3I>%4K#s2F9`7)^*x5IZJ858_j6@Bw+g^YJNQc|TZL+7?s0i>|aD44ZI z5{v3SLS^p+4LN4Y?(nto?5{gj?3_^b^1QW-{ov+Y_)V zyNNzR7=BtlIT`;7$aB{E#NtB_V!6~)@0Q)pWm6GD_iE*@p$)?~;=+DtYL4c;^o@P> zE9iSCevUc_d`H8Fl5P~3NRaV^h3<3Nv#s~W2CbDAOL*~^9$uXqo>J168q_-T`O$oD zIs3IEszR)Dpl!xVDS5!UTur2{j?BBH&sLx#9#tnb9nErWXcHr$84sjWUsTW>oNZy` zowtGI-z3hjJkFsLgh|0f9Zf#!P)A_5CC}buoUs75ctZ!@abgg5dM9lh9}5EeM~z$>k6@zv)brl>+z>^WZ44miGxhU~w-bp*xrI0ZTh;THGSH8< z^NXh4ph`O!l+J2txSL7qr<@sIGagn*^27@mhCE^mCyU6{FShGD`kFb|xa%Xg(fL_D zLA@1~eypl~Q;3hcu!$`}5uLaPVG89O9Pc^;ordqYQguN5xV%<+@)VGFxmt{~!XsV3 z7Nt@e+0#P-dqq3{!$DZ=AfXjn6Fo6lvS#F z`vDprg31rjjDt$)un+5jM`WYS)K0M%RKR+a#j_aAk5sHBgJ{(B4UUPTDB~WQH_{;~vCyiB zHUrkaX$fJm70H*1*+?xnd$LI+UL}Lz_M702DWr-NIq^wtgV{9ALoh%`%V!V-Rt~vQ zCEvFK-Fxq7m zno57G7!AEKVVK~JxTEAy6zbokouM5{)j2Zur2TvEQ^wiHUyk~W5nu*vbt=nU($0P? z`&DxSEIy*<-Q15VXW@|iWh$QO`z_BD^L}?A_vNU$D7*+zX|dG0yU~wE;VXlc2SL9#z@+`x2q|UuOFMBqsdoo) zMS7h1L>sbm3?OV(7D3gM3kt#TI3uF{)#M5Y&cNNwx2GdBXCahWRBI?n%+R5utV{Zl ze;>y*RdRFY)}F%`ODpQnC%}VzFXb(Srt{_kkI;mv&3r;|634<{M0h>eH3!L_G^8Io zCEbJ4a}en!$25WgT1U;6(74;M;3nk*#@`V*F!U&>oS3fyzrorsqR>>#7*02b1AUI( zv!RYNAL_h_x}4M8?+4s)nJf)a&e>X;qV%i^j}`vh$1e75SB5#v>kNLHOzn-5KCyWX zpdc+VdXpS((_FfD7|Ppoj2-Ws_x-Vl1OOPTG?)m);I2Zo3nrJ*-_6xD_Yz#0DGuN~ z2L94;X}|4ZK6kn`&S7J);xB7AH|hS}Zj1 zh$E+h-8_)sV4?>=)+3(Y^NVz`rpvN|WXMuEENFlND|qYP6AlD|z)pBXXO;6r8lz$% zTA9a7Z6Ap%*MCCNF9zFU^2A|WJmDBxr7WhHcLVw3SP;;s1hF9i{!1EC&kxh}IQ>9C zJ@40HBr_sPvvFNWzz4H+>tT$wvB!@xdSrk4DQ75M`>4^7WriyN8zGx z=yUtCrUmMb`3OhwK(O#l)JH;ufffV*KBU8Pm#^aITusm@i&fjRwR81<|7IOT)U`KDCyeo7(h^6bHCs^ht$t zjHA*}>CzX>5Ckak#5kLh+CQMFq0#yhTSbgk13@U^X?6$RV_F_%nHgaMA5NNwl=FnoWnKuFMyFp`7iPm*KHh1}o^JG*znW3KGH0!5#U z6_PE`8Wx2qg1qW_;C-0PLmL4%u(UTem|lsA44rIx6<}Y<8N>b@X3C5v?ylxbK@nA0 zP#KfviHtmN1AaKpR<7Z;v!0a|+ghY1tn%Sbc(s5!3-Qp~Eo?ZrKb{xqfI0{Z0VwKi zQTjuUYrk;Tp(uwRgm46j(edDnLi~~L-~u#GiHqMWv+3{BpUW-e9(%XB_UYRfb|#nk zz-W=LLW-n8w&uw%4oG@!w~GdEV#ATrKp4RA2!cpW^9zUi^5Yh38R=uSZ?`@Gf7TM} z;yAt=<7E{s<_En*;e{W2Ek8XZxqu0}`^5VxzsYnTVe#}y{;)rAU*vHUBT zj=xl3N!Jnxup?d1C_eSC` z$(NaFqd@WJlmq9njHc~y2BK# zhgM#?lEt?c$H{agd;NR&al zD6O6&F$gCu9>~fZyCVQ+cRZtFdM_`%JHl~piNy!VPpo)%3#g2f=;}(U%T7{ zk`JmR*=Id0Gm2W3g>mkWxG}f?VfQ?gD~n^I0J?HJ$KL3zOHE^4&`jy*7V<7Xff*GP z<3<4cy+U(NK*(MGz18u^eT1*Hc6hkJxTOD}yMxSwY04C=ys_5j21$9x*l8MRdy*!R zvXWUwzw@;4#otb)AREOH@xd8&w;fcU!gc?3ep;?Z>{jAk#1vD?!j3`gNC9%0cT zr&P@6sDTjc$SbOyDiww--atfHFYc)E^V0o;O{>NUi>?QxCJTJhh)XH355gV74`hR<3ZEM+Ro&h zK;nLR=j5nI2lAa~a4fm>M<^z}L_V2uyZ2zoHq` z<8fGBY0oYrEQIh5Rob3#3&*`73wTSnji8{L#V_i9Orlz(n_~ug8h4n`qeZnzjV+kJ zm}T(6|D|>dYplMZS7wN34}@Dr`-R*bG5{!kX4wqwL*9~3ZtW1Z<@H_J_>{~DS7clX zdGC+gROHkE`=ie(Zy`h)N%^sBrzd@{Af&&uTlK$>+YB& zM1L6|1xTWGi~S4lSLE=Eo?Wn`=TzR{MlOO`ROMz0YG7y-M#m=>8|02US%)r4E|Rzj`~#q(<0>on2! zgKiyV?w+c8iA{xA=6pf!DXu>F#;+Tr5V%9+|Gzn@j8f48T3YuPjA1(dxwDHJEev_(4FY>`(heOV5!t- z%C&%q1}@WQ!(Z6435zq*=ReiRBaj130a0k%acAA2(4oCvngf?D&_{2JF$|WI>Zp;b zEyfE&F8B(-5St6G>v<*2Gz9j(?bpM>a#^|oy&dt`D)5Z8P2Uq(d>w){0B>$xT#62I z3>Ve@#ZK){D2tHjj>1aInDMlF}vAA>q}lPifp|qKym8Vnk)Z zf+D;i22Q5l97x|DvOF|Mmd<~F3KihDuUe1cC1~X<%|kdX@A^mdkwbDx%2;P;=Tmml zIS4>1G1Nj&9T8Mj;;)fobS8*1o^ZG+qKj}*j=(JD)TMPy&I=$1{te_S|01wK(2eeo zB!=64Oj>r!D&%p8sT!iowY~&HfKXrXu(|{mTbop4IjI3lqC;FnK0yKYxK&!cW zvVCFnC^t^m#CeLz9GnlQ6#%3jSfFsEgudTX>@p3qHhLb6bFdSWEeIWs5P3yA-AP{< z%xA{W*rvE2BpERrKV)Jy>v70A~xTI=*r_k~FUn5a_xhpP#G5x2`CpWu!116K54 z;#d^C9qB$=upj9`zduhP%U1*^_Ty$+2Su#)Ue;#CM+C>0Jg4FpSf`x0I>oI69#P(D z(tRNg4XLi89wcSXR#u&~&qZCP{=ahM?vK_imidqzZVt(o*2(au<1c0|Z(HLlE=!3~ z-eqK()0yNM(wz;3=x>B`>5i+_3b!G!;i5+Cv8@_?2^iy+kUkxa%W&FoE`w@&v^rs2(*ygt9hKjR=80AUYtY0PtI8=+O!0O$ zfGQ4iF}RH1ynrt@;xW)cT-Mlo=$UNs+tzJQuf8oZ z7qpvm2O@C-`HBQ*J1W%B+F57hPDzAQU80Kh-|ib4incatpRp5L>G`ZU7M_8Y>K|z+ ztxw@)AnvrW7I5v28Q5z^*F1(~;zHwM2Bb`_%K{Z~15Uv;QL*c9YDdMYw)>}st2MTa zBil<0uvj$%-6y=~vhA+C=i-{~kWSCh6q{r`KO@aKCn5Ik2Utz=3*CCwMI3OHgeS-ret5ipj2Re$o z6m7~W4kWhehU?S8qIY%fMj1qD4SW!q4^l(OD`+Cyr=yschv4(xsHosk2?wbo_+;PQ zH{J2-&IZ5Dxx14>@d7IY-gm5G`zoqWMxIix0^|m->*n}+hbRtjPw@dGNd?_*DZN=J zL&@CEvA_v3mFh5WB8j%p6|tP8oM>)ZYy!m|WMJfnq$xBWz~@??S%p5+{i8R`GiGCG)>eVZMXlhRodWndrR8Lct;PsjviopSk#>1M%bqv<3%ya~i;|og z1EhDvfM;1poSc~WY#z8L55!}YBq9OgnwrMsj&;uWd34Dz3DD03l;}ENCdXvloZ&IQ zt0sAKf{FzZ^%<~!gDoF?Up27ckEyUMJMYX>Fw1wRkR%b=5B7 zB}qrz@;RYd@JKt==$>GmmAzv1WpoYH@=dhrq6%Kki|pjLz<34kUOgXLn=3T0TJYVw z$>2b#9rc1G$Hh&^_AM9?6I}wVRzNdz8;bJfeaf+k!q^S#C!H9l;KC@V z$~TxBmRTn3DJktZzX55C>D*X|LuwpW%7ks0NtNEGs+6vRL&gZ;TUK$`8G!K}${8L% zi#thos)yw6mLa&)8y>oj7KGUq*f{>=TQ1UY$@dwLv(6X1aEuQLht7`r3?@aO2}LG@ zl5c+elmTuq_ZB~kGsN(xNH;-4=?0}9@hMOPNss&_&5{(m5>W%g+Qbs^0l%0&q~d^> z@Fq5tt&m7`lu;oEszE$)>xHJ1yH6%oEm1a&cm2maTslc1go>@Zd(f`6^F#oVr|{7$tDdZ2F%OHy{8h+M zH$@Fe#gvB@aSI>(Bm2S7*noXI0L%-psC*-LMU7>S6|ZCP9bmJWpEk>?B#^aGoYCJJ zUvrMVv*VQpOe^;Q$^N&%Pi%b2paT}XKq#Gw*x~yZrXz4{L8QB9>3aA=YDDCNZt3z&?kKv;O~ES9)5YgX5a`hmePbIMeh z(w%`%FfV(*o{2ylr>lw+z17cKGKPdgtqfx?9ZOYsArjN$N3Kqmz-|_BDJc`Xo1H$L zn%)qSd#($7N5oGYc^c>*iouy6L=KCKNYYaG@|TW1E{toVn*(N4O=jo41(5fk8iCUX z$sRN3+y?{p^Fad+*l?2A;s9;tr@IGm6C(w5tnX1NHQeh0TbLDMzp?Tq=t%fR`a1FD zZ0$zZ&8nVa^QZU?@7B`UqEymPwG!UeyO98v?kaTRs*&!ldqF4(?3lDdJn0V$bpP!2 z2n2vYTRpLOBsdW6O=#RIlbqwkYR8(uxow)VX@O zOU)l8_y&?r#1@=DNp6n!|F8R6JOV^LHF##vpeLvL1+t?He-yT9nMS%}z%PjXV9!|h z@}q0p1(WP9&N#$i7os)o*h*BnI0*+GT> z%jHwwVRG#6i9kU_k|ipdH!o8VW_!|F26?mBg#~bGe;KbVb71_+ zvLON;(=GidS!v0t?7Sg`K_bbkBJzl`SdbOuMPv6bg*mE&CW+UdWIv&LKb(=>_TCVQ z=Ozd?fwX<;m`R#KzG2?Ozs*nAO6nh4AVxI<8}0lFS6LS_ya?}sDnX zxUMG=ZNaRYECZOy0w|;=ZWIc}(o*an)owwVHR%j(@dsx>d%m=&WO6_L!vyxnXLv`F zwKt8Wz4xg%l!3DJ98FE_F!qit)i5U06%DYLzf~($d;h6dSf1=jp}Yi=?bS>D531j{ zcxLonUqav~iLj*7BJ?O+j+;<618NpD?Qtj}_dEzMyOaB^7bu-?|F9B_jcAOZ!9N@o z=Mkr3d&vJEU4InTH$8Mjs*N=hCmBj$0febmA*PQ>VjS1$1lh&{Tx!_>9Oq>~arHU? zk?4nxvT^rZ_m@mv2jJcR8X#e;L^69`O#J$7g@bd0KK}E%AdT$xEkN@Eo}y50I`=a3 z;y9g5qnhx|>((!sVbuLnj{3vX11^HctB-FooEMjf)3+PG4Xx8cuq4JW?Y#Q*-(O)~ zz|M<1s!`j#Tpv}+_qWcAxVt6&Zp^_C^YZrLlh2Do{~nDPIWKiX(obTG&Ckp1MMtBm zweWd)bQE^yb>HVe4nUk2ZvCC)1LD7>3@-+>%6(pkHH^){*f3tqLlzu%DxVi~L?bub zw>IrwJg+>%t8ZSOTM_s)W1RE)%2@fB7jS*_gno@ouE{Jorc&HHxA^J=1QA{le`yly(Aog-H=f_Yiy z0cd`_2;a7!EKD}1ir8{?eqd-Yh`RI|rb~$Piu5o5?KWrztZ=I~FC6trh$ey5&9W*IkULUUqJJGtd%jbz)Z@syl#MB$TiKaAyfkC%*pT?{gbueco|M?raRTu4%fRd4eW zaol_3Ja)X&Y>-KsZe2j*DY~=o`cI1iCTXSm#F@+Lx7eQc9i01*Y@Uo}S%T49FkKPc@3)u=mb6&^U z-|xJBXq;Dou0GbpR!a}ycNEPl`yQ@>O*kkV0<;H&a#&$r7%GL-z(}fSeqK|N#e|jr zrw%V_x%a0tufAU(-;FyA2=5I`qc{Vf@v=GDw9u?op1)OlX*r>j8i0C^V~R6BuRVIm z6HS){&I{na2=LF)E-#BdBm)ylYNmBIz3uNkK*K&W=QTizuF9sEFzRgoXjA;GoU2h*=Ec16x zqz*loQ*mBk`vt+g#J#w%d9h|jBLHxic3!%M?Q#wq^U6)_qKft!DW|I$q>L+Q&`;{# z09qB+zo;C)czkXhnv98=~US7tz=u*H(w~nl|EOK5t|5TeRK(e7k zn_u5vQKogY_4nLy6eR^mtG`gmsjy0kfpt1k{W=U-g^uL$dj7SJk3ffTUZyolGkBirL$hLG@=Y4H z>Ad`D3!$7>E#6P<_`gHXp)Zg7uA|AMn#$-jpI3$Emu(}?@!w#?8iPy&R(pLZGq222 zOG3N}y5XQ>6Iv&b<#^wZQ!U9A+L~8oJCNvMOAfD`pu12Srel@0Ore zE*`AROvK3i?2YoF|CUCfju%oCSEsLJ#O8(EtV9cfZ4QFD&_iFY zF`ifA0?ztBJ6=XG%}K{gHgBvN5!HE>;%`xZya(`}vSdRrGmMwL{$5UCZ(B4ie7rtp zL{k`dwTZ02{d&*qNk>b%T6mh)woHp?UX#A4FfY$IA2zQq_${))t-D6dU(PGiljKnJ zcKnaAHNzkrg?X*(A^6X*vWX44uNpL78s9#);BRP!`9{PT2R$8COoI-?T-%JFvR%7z zb&7Ts$4k*eZ@RMhOl9LmiPvIZG?ngnjebAzp6~#-Rxa90npY0xz2Vm8%w`GQT zofRD6oEI;b6F;wLZXE3HdR~a&WtcH6-t5Y|cV|Cy|8~V`Ox^ynS=SwQ32|QAlV!F^ zk@{{7m0e1w$c@p@7=t;V7a1!Y03r%$G#c3$Khb7yJ)F_J9^8Xfofjq#q?Y!_-wxs4 zda2x~J@?IfabrZY@%;Q|7N}f~ZLYJ4KCjRm!BM{l2=m*#KGc_|@w{d}wX5;Uj!ofw zc8SNkdR`FW?v0ma95T!Z6!K{cD2i`5^XiL>!*&~c>14;~+nl_49TMJ*dS281P2#+2 zau$ew9ryDp{z)Qyhlrpj@{zQBYW?plm(=j)wV{nf&A}~|rnKT}G4d8k4{3?~2#9EC* zQ%pq~i)-L>kye^<4kLBf`r!N_^>ZVq}T zpUb>V_)94o!!gE7qFx2l0Qncvq@p&jQYSQJaj6RS)tc9ZKNdQ#Ok zFfZ!Cyj6YsH?KpI$*ipMpbxt-o)@GgjO?y3$4>)#Gcre`V)3&gJ>vzZ-M-{Am*-{p zwjV33fy+%c^Sbv{PUjVpf;81dn~RN?5x{=4?<=i&;rw2At}ghrVoxb12i^(%HfJZz zt7esAjmInO4!%O?oS9eHz4$>&2@fXaybL~Iy0&wgc@3xyH!W8So1K@ca|*BacO@!` zpfbN^R0t|}%qte(#EUZaKVSXI4AX#4yROBG&-RF#58)qG&I?@ZIO1gFziwgZnCB(G zficRV!{^1A89RiB#vA8ey?~K>-^rEED`{W)&CA11degN~_j#GU@S=G+4n${BX44p> zDixk#65vh?nXq7voOE7e!s$sdFJt^Zm=f!#J}>McvR(Iy)r@+nJ}=(qfs`JjOSlQQ z(^TdK-I}>5!Q;H5p#oF^51T(Gs7|iZdF>H(DZ+SVT#U^MMStfaJk{dM2BZ&K0?q5g zU(?efCU7gA7pW_5n3wN%-fPaxNKSjafP_hNhk8bb)Wk1n9X(1^X>?J4z=lZ=2)xg^ z<~0UwBE2t-s{$OKI4=Snh;&{Zg6wK>i05_Clbmw|EdSaAV@fxHkv#Bt-DQ|>fzIH{ z6&gv<<$|@pKvA{xD?^KUVNYIe9$e&xqQic$sNc6Jm`5j<0`e10)%=Bjlui+?<`uqA z`azshF%-V&=QTbq*-#Idn9uPN4zrnBsHHD~~`2 z^bpoZ37=hl#~|gr{tOG(2DPRyC}ns3MiY5qMR7 zX3w7I#rQj1q1JWhUYl5Oq+f?utpRGZnin!LE!*}DRia%}TTRFC{-Uj8{z z#yVs=$H0GHc^8N)b6#`vV`*6^YhGd^i3?dE5$tid)Whh-``;dddR$;$`Evk|hjqLj zk_A9ROND|8^vE>BfHBQfb;|RC@z*M9xO?@*wVKdLcj~H+e$22~@WQy1L9ei1F}|DhWq3+JNQj*=4g8{Mc*ZS@4;9%}+gLX9T4$_bI$3o8J3m=^ zySoOHt$neBOP_pMDZz(pGvooR&yZR7HjNfCs}+`Pu$DS=ZWk%pcXzljNG zWpwpL@2bX2mU&)?j#rGw#$ab;@f4v#uzh-7!`_ch7Y&isSEj5O1Cf6$=%~opC*O6V zCq1vW4=OjWR_HD{^eZ>7$yFF$&`p?FxYvGMiOG*wgk#5%kdK$)qSc4~DSBoMj|C3L zE2r`0m22?j^NNkRk6`nnfPmU7!>0hpIpbrbU6GN>WS%qjU^jB}I!q+>^Gf$iTwNl2 z7^8X72Q6w2fTl?1Wy4!{T-=F-ot{?%J_B@17{`m5;}UQ&4)^oou?&?=6PVp#yb~xf z@06{>pUjCT4vh(q_jQg-OOs}tkDAvxtbo%>ic^m3+X*~>^yVc(!7cbtZ{B%uwgC)vofpX#0XJKT zKW%J%kg(>?#g4HAe%o{jI_BKGwDW;tDG(M-Q-30m0EC0;yaq3(ic|S{H3Kp!y?Ft` z^=9mOJsx|z^!u4R!`+u;^RHy!Ol0QO&y-2?+Gxd}zD*X*opf^{PvQL6*B}#%aq8h% zsM|YRe)Tmg=cXFOyt4cwl%GI-ei|1r=)3}LsN9V@A=L}Nr(fdp7V^-yFawwtj-XKm z0XB>nKoS6G0HATN8fITEsx+@bjH(b6N60J+1fVdtrBb0HiM(T8nobciVA**kRV08A zVW+*Igy6l12-_U-Pe=zY6Z68?}Gr@^YZZuw|RA!O(HGqPcQgw9H4!5Ft@tt zi(z0kFM~P?+*x7z7v$>zg#kkh4h4|srTofH<^|kP((2ADPR$FhB8nUX-ofY9!f+N= zpaujbK(PsUUIer?#_-{Ufy38nULW>$&9TsSgKX8=u#YxFPxPB&qY==)0dz)IeH7hh^^m}@GS<6x zFq#86jkZIy%l$n1u>>a-*}I6-uB54}864f|s__Um{P)D$m7MGFT!u`F*61T7a|-IN z@0jN$mt4;}ZPf9xi(rWhgOO!MUr(@W5!P()+^COwRqWr=rrqGJEVMunQa;<>#)w<< zu%|)wLW6Aq$S`Efqz7ADksxCs>d|3!q_z0*QxTlv_6d81ZqnKYnDtpl#g zv`@>J?qy30kb^zd4-^k0d>g$4p2c3p-{_nLDaj2l_CUGO^VL_isYErd0N{k8WB}D9 z^-SekQf1pi(FoQn75TW5Bw?usgv4*S7OGE@Dk1z8I&umwjk2o$H<5x{i-tKvY%Ad5b&2OW$*bCRH%Xn z-29lQ`ndgaCWH$wkok?G5<4kWq?$uS{p3+u-EQ*M+f%($ zAkUQH#;6=NNr7!*h-E)(_pdivKU)lj)R@ z8iup8PwmXlv1pJ8l*o-WQJLhjKwO?(q1C)(hG=9df|Slzd(k6 zW?UTJa5SWm>`;bI&UiO;*QnQd{kCM&1F|jwqt`AS?y?-IoKs2}w zP2|=O(L?1f{^NB9Iw=c7gp@Klm4{RrYaeY1;34IRDaAap*!MAoaUbzs*7(yjoI;8c zb;`#h&_9u&f6OY_>@iC>f=}K1i#fsso#8(O6OEk@-J05zR#(1yNIHclwNyb=!~yif zlY0kYmvgq#x(YM~eKA6WVih7c6k|P8`3RMOAQUEFH|HkSiV+3`9ie>ZzY_Ik&tu*A zOTWbQJS(NTsFdgAbKxEihOxHqPzU(Q40ClXC`G@8H&|egG|wpI4i# zsNJVuDQ-B=;{RviA9Si-W;vyX21NZ^v@d{C5z3?2VAtIsw_W@1AlQ8wi8=uNzW`}F zJr?zpUhv`R9>jX$26CyLTDGxJhNa74)A7o|mDD}gM2@~SYgqpFH~|0Fin89VNQh;X zBh$zFiVk09|Ixa<%yOOZ6wC_P_<`JXLAnxY zDO1q&B{RZ>560L0%w|bU&cRNf+)G%IJ~hnH=&(rfgIR266>}h;vMhr#Q423W55kpU z1*S=mJ@x1z;t9YTs}!Xvhq^=!IIFxjD`zCf`X~WWx#;#xTJY4yo0J`>+UeA<1=GvZ7kd%Qd@qhV~-D5 z9T48JaBPi54YZGZ@^uR(&lRv@>to+?-Y0@##eCo+Wrcqz?4MdEK!!5d#`P~3z2Ly@ z{#q#l%vV-vSw$#d?J; zbe|HQ3#07Y0N;nkEsH;hr|qDfYOagM{FcMHX?{NdALXM*x+S2Qe~@E0Gyr0WSDk(a zP@I(tr!-%#H~Th!o5b~g>|Z-aAOiVqySO=Bq*cniiE|^wTMSiMgvBXedNVeTX?Lq& zb)0KNCG}v63RcU=o0JMiF10nEgPq6W{dviCI&}a)4_t* zA^`Oe&9*_mP&ue*7_v-1ow{c*+Ou^4a@n1P8K?#d$$9YB2Q1aHC$Xn7Bo>ZfauK`A zB99=#)qgpQc0VqNgb$8rbTc&PxI)3z(h-LQGCaikG|~Hc81a6b|HJO^G25%!glO*3N9v@*7Gpr9m^q7(^@ZD%qalM2B0Aox7ePvYeJLLgOx;&pd%6`kmm z1y@x^l`soV2zff`RKq(I9Y;~Bg%=2vIe0^W>L~jmY0B-eaWY2UL&7k(nXePj6YnsQ z5`29tMe_q~DOCV0h(!+aCOuqQN4ZQuqmv>A#?8dIZq`q#d2Irtc^_&qnx=;p$kWi{qIbEN#ZDK!OsyXLTcoFBr27kG`s0r84a!3l)B=v z=YUUozb5TUdjX2Bz{1!AmO(ynH3O)IZC!&mT>vjY(7#U|nL&~SN*3fG?sRzOX}Me* zVjrZ&7Sv(<9`V(nGx+=?~;J>^N5JWHLH_X z6g7(;YKClJ7iJV4T~MhxQrRj+3PIh?{Mb#Vkm41f9}284Pc)+p!7a-Nbc@=BNkLrN zpTfKI>mOO&Sort*AdiM(eY^pdY6;(VLgt_VEA`FePgFzRBdAcgJ4?^!OIDygSgiC#<~7SA(Byr z+~g5o;UaLN5den7?V=4!AwIs;R_*)dk&1SjlQZN;DW4(WE*JwmXK(< zBM+k1KxNHHgD{hTLIxCS*(-}Dds$S)EO@=gQ4P5?>0bQ*mK_6;a6t)O0^%WXi%HBJ zKj`5lE4A=;v8wjw`LcMv#Nbg{Bxb=E`DB6*vjt1*v4A(+(wnP+e+5*)V! zszj1cDWx8ayW5c{(iftr3Odq#MO}?VEPpP7tpoui<_B(<&0B=jPaaC7h8Q-%KBXCy zjjnhpha|%k3*?-jpvG}{7nJ4pkQsI|A~vF$7Wjz45uKtK|In|z+0mGzUCL7EK;oR? zMH1VRh6$wMPS4_U!6Gm!dzF>Gq6ocSaX1#Z+)ZfyvRHq7A!`H&3XRf=0w%X2>RnXk zAZdEW0I#EDGzKmK%Hmf*_zP%pl9}V=L&L9OL)45n$yMxuhhrfF5I9Pe&pWP?7|gb! zVYMMhR<#b~U2!#xaiJ=uihO000J2KHqVcCp7(B4tgkTJc6FLRXij@jyq_Ji^2xa=4 zf%1Qi%404|tBf2*;_0F9yD8zz(jNwK2*>sdccBzxMqc%1LSOT4wG2d%ieU?SS;;i% zYyO(pcexF>3K@?%#SJzu5!gV+UY z-Y}VyiC<7%4j8E~V;&bSgHl4)1|dg!v|9y|y7Wj*86>kfk_l>px0Hlc9|#Jdvstx+ zxqctNIez2p3_qJ<9ucVg_tuM(RRuQ;AxT31%tUG4!qG)LFKW`3!zK+Pqwqio4G#PP z??oP1Zi2%nMSr1|#AEJD8{`@kd5UGA4G^UTTLaS+p&1+BJsTm@gNW0lfi<)BdMZWc zP%`sQMEZ;X>nTCmF9D~LuKFmcpEH!;TqhyrJdFYDBte*tPx=loYOH*97p(bOrF149^a6QCeEfFcZywTS$PrXn$E_RWYRwG-~sDG}bme zWV0}ueJ*zpfYT2vMT@*gUTqNYhYI=SP(d%QBWErk6&g|Vp%Mf?NYnS)GM9A*SG`76 z*T$L>qJYS!qm#Y~Go*@MARJ;~x?x%yX6EfM?Vq8!;?mZiW%3xo{wmu7JWDF!xt?B6 zKk~ApUse4WnKVLAJgLb%*FMC+k~$U#mzk`#A8Ul40g&qX$R0yUfm>Nj{+R_p2GUhG zj{H9SJFZmsZSZDH2n>!>;>Wm2G*Q!7GMSk(xg0Ou7Fg-jn|0&lF~T4PPBZMw-UMvQ zuFP)|>x-w`Jy{zL#qDqu44vYm*HarAVFNnEWLYuh;u*HFw8h~*QS z0qtv;}eXby)oyc;P#IQMM(K%>1J$D0`%GmV)AmLm~L-?I7 zWi*31R#J1?&TzPM9K>X$In2|>D#5M&)S8ZKrZ$3lYO85PY7VDvPhG>QIe{^NiVDfN zI7stJy&XS*{i%yxc07f&j&>e0Xx^FepMe2}g#I&ijO5Y)O^CtCxgn84ji8?-y7J5I zrX_3l4d!omUz795dSz;8b0!ZPjye{02<$^>Tz5Cj-mXj8{n^BYwn6@VM!zl;S^kcP zEz4O>C3L|fwhsON8z~4Ejt^BFZosSdODYM)bt-!#6(?xO8TcmDB}g8A{`YWQ+5j4_ zoO5w2!WwCkv7$Y2-8=G}cX_*$@hq(7yuL%Uo(lJT1xy)jk@QIUxLgPTA1l_;A&m|w z{oR*_bPmi<*y8dH6K=u;>~X>aw|OUvhSZ8LQqvRKRpBu3{eFctf;J{->?buyv^+5> z=RjuqpN~TPcRVDpSRLO^#!@zaL@6kMr-qcE6L1buHyZqHXI9bMwAU!i_t=-)Uo3Lov68&#N!2|JYRPH6#uS^aE6ybU@* z+(bM@7B;X4DL_Z(o6l1SZdh8;{aw7M6eza*@?kuuv9prJq%6=J7}`+gtCKLOLr!~*LR zP@ulU1=eX>b5h-_LOcJHv|D1GrY>+T?)8cP@ut$J2Ta!bsFl~#r?GyJRUbcJKy}oW zC^AU{!a@Q)^BFKFbLvs%U!~GsU{LjKsRD(CT>{=t~qQQV{>5sZcl6;TpJT~L4|2=*W+T#B17Ri$? zq@V;3u-6L3Y4a2pGr<_>+RyO7Rf1rHmfTU~tRL??SVPQM=qLL7ROgEY-4lr(DsWwo z+=m1TD9evHJsu!Gk7j+HB=jKh$tV6{Sat%Zaz7zd#olg@GQN1KXjHSoY@zR1SN}Al ztiRDAc-C<`>;OdmB-l2DQ)zw~Lz0>HE2l(+%D>|=@Qm7AJ1DUh;lVL)!|z+99lfIj z^$Y5;b{?v=VS6W6veV9M;}A6-()ta8^g$c6p%gWTudeBboM$rY7$f^=jux&xhX6_s3;)wlz+&4n``|Df(>hOg3 zul5qm-NhHh?_>gp@{9t`?ijgA?7vg1Hl+J0iZ8huDDV$aE>ut{y?^1?9);K8mQZ84 z)B@F}L;!T3Z3ZU#uTX#7&s@{~g-hX%Pi;$U0X|;Vp=AO{31JUi7XYh>FAM!UZpg-W zr}3|Fdo}T2A1b#5#_qQzZeu0j|5^-lk*i2P5e2?m2+Tj?0HCxT$YhlngJvva&WV)` zxK5uK0Ca6!2`vt|Dg4*J7j`Yq0<&Z2dq(<@QcHi+!J=}lxZaFQ6=h(z7o6^>jJ>HP zU^X+LhJ;)!`@yW#i$DWe`C6e0+1>yfc0E~HgjT_e7xj6#?3FtWy)EWz*m#l zX9i$-Am6Xa3~aRGKm*oB1i$XFgl`=a#SHHyzGmrrQ6L zA54w5MY0hrxhc%ZTacR8B{)k(G++KV$}07^eBtp66c4GFyI32_`zjIma{X|~mH6hf z*_mOeU%}Rur4o)jUi&U<=M{*(Im}4LLwNniXpnxPjNcfma}EWA(cn+t#T}X1?R$|_ z7%4A$Uw5zp{-k2&6QRj8|49wvHSHrdlqCHY!&X!toA8;5+<94E*7PWPX;$cRh#P73 zi4OPZEE@o+@oY0%5)re|QSd0`S3mIYx-be@R*i=N3I9L51C#d_jG#h)5KopqBMMpk zluWQBKU8;-QbcYp@Lv}Y-uh?kO0}n$6af(B1KQPo!)0p`Pz*i!?zWTQ;K}-{_nOp(>MtFyFPc9j7y#jJ8#Xp;@j5pjD| z(xfwE%9zKe;3ImAz@~TtH6-pQuHj#sCc2%=9hH=VGDZohr)W!c#nNJ7IgCwd^+%6g zoXaN`ib13xewZuQa&TS-%eYUpr}tE~h%9;~LdwX6Yk^=p+*-Jj{{Mn(27KbH1Dnm- z2h<`{_&&|B>q@655v9SF*Pn=7V5$jd3{vE60NngNm>WQzTC3Nxk@UDM7#25Osko*C zlq~%-Z@E=S~Jxk zVVN_I)u~b$_mQ8I)RMR192#3xtj&d71pZ207Hcp z?r4c2y^;v=$fz;>*D*rE} z32jxN4B>7&@pK%AJ}`x#c|-;>?SP*lCwVw@t(bvg6{gfmaDQurT)?Ozy{Hvc@y@Jn zM9F_XMi?Z!UHd~>hgMJglwI99{%3gf#sG}NP6;zGvkn~gA>(6FLb;ibN_jLubfu&} zOOny_APJ8AkFOSu1<(a8RWzw;I8>FxV_C!jlJ)lcZB#~7Qi+6JP;lTk zLWUDsr0V_*1r5{6_PAR>y4kmPQzIHA%c$y}O40n$h}Tw4#!Wh>AN1FM_z{hHxsU$b zO2s9dmys{axwELsq&OF|K&BqvjKXT{JN#gQa_MsQp$xP5D(9r!fDPr!aN4dfC&z7=;dYB%9+_>5)+tNRdYkjL38PY!%J~|K+jD39vGi zy_y0KQjRyrl@C$lN?E+hhR_cov@^D$COy@%UD<7?+j}_12ino*X)xkeG&v46_#Dpq zMX^%U41+BZt*OeJE>Un86|@_P)Qqnf69SSeE}`$P1hqv@;EB6vCb-JKe}<8JAb{*f zd}8tWuzjFxkV2cDa>`?~>00wl9Gxsuj>J+c36A*(%J38o>!^_r;^9vA1JnZPFspmS z1Bn>AQGEibiXx@ufXee7Gh^08aCwHAeX-wYzN-?p;zF0G%CsTo4e*{th;B=WE$X3M zp(QnFXTe*fbc7;TjPcwhNA%yD2BUUXr!5?5XfJmsR65--j*BkInimX0YP8DUklQuq zK7C}a8%3A2shz;Iz%#98j{<^vqu@Tl2)h@l+a^!cV??J308S=Rg}`4iOSm-0({wGS z!tjN4We;hC4RiV2o2>7ts*ttH)PXUv{Sr~{4M38fLe^y4Fd9w0(rOx^B~t>dUiuPnid)>ORI&~1ym zHD|xuOyH}@1`=qW#I12mvW)b{TJ`hrvtd_tFvVHYgRhx1_k+FTi9xWYiNx8r%|IG$ zd1SzPNQ%$hr{gNi2@J_3;Gb)7hh;0jz+xNe92Wm@lMyM|oQ=tO%@F8GJiiGER~1bb zI~+VO@%f!Nnt=GTw{EAht?wFh>D6Qnbk@aKguSe$6ClVp2a(nQ+3SA0zrOl0N6yDC z^@}9gGqhtRL~-tbF3uUfQC`RI*LV$svP$dlu2M*mYm9G)2wjm6zDy#-0wp*>NdD5C zcV)dF8UH0?E7{Hz#0#`oG8un>Kk|hhtnqP_f43>dqOMx@4)vPK?%DoBCc~-B439h|ylL zXXYjPxINw?w!lh&CF{EYSq9fLkHq&|U}>!1Em9!ypw~$h$_MyvOCS0ei zMTUk|-9Qz{A4=`1f3b}$q~4^uZ+^V+lz2449sr(JzLuErHuW!L5H3VNT66P50I}IT z)gO2;j1B}O?@#WkeT#3A(Ex&7(~wSqt5;_44DS-+B_@9c8V*R(@XI3^{`4TpVC8bq zcV-(iX~LAX(1Fu;k)aCvSt~mzS1{VgR(&b-5ja}|}@#nvLS zqP3Uaucr(BOo?|PM$1oJAqX;!rjE8S3SlRa-e>tsQFu;jbd9%D+i{5R|53Twy`0ji>3AX4^5*tCjrs$WT12;I4GBgSS6Kob6}0(XzV9oIN! z1bOxfV(sPzy+A^MCd;M>CgQ_vce1VMd$29PB-xtWT+6R~k5vQ1x>~r(w5+dtl~IflNj;$4iJ=( zh4^a3RFr6ck55>r)y1$Xf+Bq51wB7PmBIUa^)c|Q7J{*Iu>k+%c#~!<8s{#ohb5<# z6${4!&0m+y*H;(R0+RJqkGCnu{86vAu-SlTW0n&I%mAZNwl3wj&h`!Y@-$>+rn(2{ zdjww%+~aNWWx|n1|28}X_TGurrHfC}z1}9;MUZ0M652AQcep}{7zj0j$h<|$b-TRS z{pp2ahg=e~Vlj>D3@bny$=4Ec>xZ9*?veDEz?Ytv`N-=^pw?*A#aE}s(M8ND3r=I_ zCz7`tvY0bSlsomy#>zmmbnLhAF##0%NXoxr_q*w-_SiJaH2{404=SLh^ zu%;1fi4R$RE?7hh@B3sscXcg8hj~_QYaZ zVac`~iV+JZwyvh^29_m~;^ey&g7rRXNeHOJXn)<0O51X#X7FZ&;ojk?tKPNY3|l;z z1f#j1s9T)ldX?>$td%z z%pp&8=W(v$jN#v3Hsc-Y4C2TZqd&dZaYizL?3|f_r}`PR?TUgzudaTZ5fB3$ND^dF z$QWbgz~Ly%%Co@qan^Hvy&xE6-;>*0k0?J)aJmzeqH|6X*^qMA!V1b;Ud4;eU29dT zWIdsqZZ^u!R<3vwB;!7OOUHcN=;Rd!2(9Jzpf;3fl(DXQNd=hnIn$#@BRsAGzx;2s zS`7`t4nX?n?Z_c_eV<6W9+}y~OYEbwdgdrMYCx2uut}f+yBcGmz8f}0%0Mn1WLhQ) z%~o>WEI;is42-Zi`oy4f$Ya`E?x&4(f0_=5(x8WF1~M|V%tKXS4n<95FYR7TYNCWV zO8t%b1rVY?d~%X2t*i`}f4;=0zUx@W8n<7MFgdJvWeR%}qx7rt-ODcW=oO!Tm1EVp z8fRGVBs51l5EXXCc?@6Y?#}j!BvdHA298600)Oa3s~je_zbezF+2n3-u(L?o9bw5X ziU`_y8I#Z1EgeTbP7!cxehc6M<`zltQ-lXw5hd4%c4E;PTHGNWM7K2+0YJLbka4NP z%oxW|v(hk$`BvsN{ht?}$RnlX#+t&(aM7&(Gx=SIjob}MblE&`>?jM{&&!nKT^Y(SN zGtF?M0dnrBd6QO0uD_%p`>yCoA!KMQGghMEb}t>5s!V606L!6EDu*i>Ih7Nk;v|Dm z(gXMi4>ryr@K*uBU^jJj6~tGbRCh>YdT2O7$CD$0&VScMDno%5TG!y|h!Z z6+b=jYu!}4bPR@g%&jOa(Wsol=;AHdf1w;4 zJ%V;dqsM1Bt3Yi7|CkSXimXTEw>pO1S3cvM60b+r5Pm$gj7NP0UXE9j*G2+>{@p8g zxYiQd&JGI~8D+F@w99%wrY`e}O4ck}qz1!`w&4#|wtzSp7x2g!PHEj~$>4z~LD%Z{ zL0>;y0gxJ0E9B|wM65T$l|<~M72hBT8B#HFdh^*W#q4{GYxB%lDJbZJU?bUAlv7ms zXZ5uDjJay4fK%`N{3$9Z?C(+^->?uMo<6@FpI|@omXqxQgEJB}&UDlBdfGBjDe$zlhofM6+;{(T1e(js;1)cQ1D{8};ST5DghkxAtqP!L|LFYHb5rFqKkya#w620w z3!#ytmP=-zW2)547Li+$P1s$=IWoQ5pl6p132TkkLtYQbIU?*Kt4LCD`i<^%p0vWc zUC2SIDF|}qJkB4qs70_!hJo_gqonD(g^j9ZusX^JHtCWOR|Qt%LJ;Ge!nn;KEukec zEYL(2e`M>l5P-s+kqOsv1{5#E5mn4Onoh0)=Z2@zdR#)+iMlK*T`>#@tfXpdVsf|c zDOL!H@hpVw(8$YVL z9ayqa?hTRycdV;Hf4SD5dk4Y(ePIl7+p{h;>I+n zg(hq`Qh&-q8|?Th>2XdtbYUTo8;j|^z1=C+(nr0!a6Nco+Y)oFvTFN5Obu#4>MprB zJz@eTlrc3&OFiI`U_KY<|F&zgMkD>YWFBN%u9>hA&6il{?1hz{SivoQ_lXQ#tU*K2sVv$t~0VKwj9jkj$8^;)^W>XTE zItIl=<}*SPy+4?q*U}0bm~7DWv3^duIr(tvbIowq=XKu^!0e$xB481wS^$-@MNlgK zkabjED-sSR^i#CaSvb{1)QZY3VV+*~qouv@t*Fe{{eMINuQ~ytC=0)@Rz~otp3nIaDp2R}UmS@&C0?cWdL%JGs%}Z~w2ZZ8UMo9Hk$^@m_x)*7CgTy=1rwdvU@Rs{H#Ap@ zv~vNr-d-ih(_{=!`NGa0u`Zdps9|jWiN&cqWYpZ7Uc|OdK?pN zuxy$rAK~%^^7B6?5*+9_z^KG)xO0tRJOX9*;OGd0kf{@-R9wdyv~A0^P9{8d&?I3H zQHc8VguC!)PHOfP+xf!ie$`rv)$SDAS5b@p+dyL8x?h^~V@fhI%&;5M5+w&>;n637 zQXh$j8F{7vf)^%lw#1EI^SI}g3AKZ85UbVh`mOW9_0z6plG-&fS$;);qx-d8o8vY& zQBb210?prGP6>jQ#%Go7_|0c;Dm+Q~T|9VNe7TK| zLN=Q3DiCI>?pbbZRS<7+wp)GB>jqLV!j~Q@Y{8)0l0Aw_e6ek;6fLX63zo-8ci1vW zI0MCgTAx8J+Ip`Ju*cq*6K8_)8v@55`)!94>+R&PN+~^rZLyqte*><2v{aNjJTo8q zuHU@oD%^^}+AC{xm$T&J5X_C3VSHC03$Y&zfv)yyA)raGQ)9g*(~9JbJ(_P#iD8-U zj5`xFER$mI&%spN2|Xm{%P_CxoH}$3y(0W!GqoH&)dBPm9MYs{mzRvwHs(4=zpE7Va@b9FpifxgFEP(!5M$GNS8^?{G5T`$fBJdpg>N8hChk*qAvZ zOcSJ&8{Z#ms5AO-Qu~ZrxVo9jqJxsmJ^0uPB}jrV2ht23MT3ZL!Vhz3vlj)XyjM<> zPh1E>CHzIMW_n2KK@l}0v|yFSOa)i>qA+C3+g&m_rAubVY7c3vP6*GTFHa4UcT*?T zPc4`CIFP`iNP+M`6;@0zf~XSgc;dhZKjeQ7(NH;h1wDatslO0hAR>&$K3C55{2EgA z9;{*bx~U1NWm!&)a3WGZxGSAyJ5H8MZBL5emFPI8LX9{PO*GK%J!tboos{@hcv$JI z_D(^KpLjGIDlfc0>Mj-`s`2~3xKuJMHx`qsu|ieNbwuKnCYT)Vl3k+jeb_ih#m-|F zhy)^n{)~ZQ2Mf4qFqz~09&eUYmT2UDEO5aWHxKV}Sv3{w7W#tV$SO_N=ngGUl8`-7 zps*50nG01;ygwL!fl-=Rq+4(063DhmY3X=cX3;c<* zBp!7XO0kXGj`>;!DBgGqW+%*HZ~^|3J~^%rv`}&)ALfu*r-N5lf8}K~05fKUxiS%^ zGDS}jCxHwfJQ;fxTTmynIQ!dM?V@a>6tR?rd6Z!F)MUz`OxH$!(~G50O@j<~PSOZwcoFoMH_~$sQ&bce zX(-1iB|2UfBgrS#3Ks~hF1}2KC8i4%<3?O@2z)(Xn14j_H?ArJ%SwQ(OS*QcVQ))u zD(gg$IlW}iO`HS_LQ4wpoEdGoOBDA(1Zf_Iy7AcNMG&6No0hs=PcVrz+K5ILEz=%B z<`)S|@87Vma=i`%YxenttHkPV!QfPm_HsuC`WKQ6m`Ae(h6pPLzdJDZ#{3e_*x7_I zMCH6rDHMv75RBQLS8(7wr(`v36-fj8T&pbOHrx0*l|7yyjXcl#N-5)A$LbA)+{rLY zAyEDl<^LE|p#Jrzm*j9H(pKdu!4N=hl z8R!Jhotv2PV61o=1rv%LfD|YXMb;d}bZTTSWDMg^F!AX2+z5|zT0A>^0b_fYK&vM% z$A=y7?^-z5*NJBQl*vhrGb+f&ke7^XdAUS$GNq|j+hYlHKqZ;`HlM%NSY%{_N7Sr6}Hu{q{mMe}U))LX9j?#Y;;EF1sRjabPJNV8OSK9`N`h*P#}QOgcS;k_jy z^Svs@>9P?&0UI5Os+KJa+_$PCLF(vbcfpI5Rxkqb3RZzEqv7wv*Jd(m?gxY3coO?1 z?@>ji${hjyIg4B4VT@}rKRdG_h#drL3P@^}N06n(^vGdFy&fv{$5V$T0PLFlmfa5; zChxmvI~IkpT7@M^FKz`OASUg%(!?bMAbUsm!enVtg$X3_qy4@5chSc6fsX`-LZsC& zw5^nNc*MmrT$$Tiw42~U>NH=0F%0kvI<^3i_;T+iDN^yaEJ&^qz3y=i%7x1k^-wuc z0g&q>+y}O;IU!jtrcdclEVig2-D>Gmh|H#$HmlUgf@Da8qM24;lvT)llTw?Flnqh% z>?qds*n=0!nwl?^UmE92FnR-N++Kx5tX%(;`VA`XBpM_bviSDOK z?h3xgxY2tlc`i3{j#_!z+~B9K7KBQn|Cf1h!o%t3mKvzVQyJjBy0Bt5;dOIRL29Vt zQ&R7M0=w<<5~sYy(tR@Z_Gpa{lQ}(R(NKK0)&3FD1ux>j{-zxvml-qWe1rdoh3G?) zGQdgbchH|7l^WIu{;?0R%iYTsG1g|D?w&r~-O)BB@UXDQcqf zf60MpqOfv9Fa$C6at<%6u%MO8L+uKaO%}!yf3hGBSsFl>RsdzcUy{}({MliR7ZfPh znBVMd`T#%3{N=5kKukP5XOuW2h(Ib3){_VX?g+U_vfpp`J7u|e&F@55QhH5u*h>q# zG}W0}VJAu9{y`qQF>B}6ye-Qt21p_2$R?nI?o%m--Bwz(z^J=yW56aEV3L}%d33x; z>F|vxJhJ>db?qXX4K8|>X16oW8s>*s&ZTDdRt~yoMP(aBF9NptM|*duMm;HkpEHzr z$vZGNK+52~31f>38H5!Rt;CI-tZ^wc3v26B`|ELA z=zP3yA?GmVaZv-4wT`F2G4H}bGE$esuw1W1t#K@i-iJ;QJCIX?D|41N^{MI>4OGI# zID~g)SsM{BM!GGN@n6OUhLMkzpsxGYjqj1A3zGcdONA1S5vYQNA zotHOGnheKp+~`A>+^^OBVHv^%v~P+X{2M^lYB>zWHP$`P05~ii!q{?q^QhtTk!=;5 z4@FPm6p5Ok@n53ml=~72s!7EecV=uP=f;K5rt*Fe0fzjm3i--!^v5F1yZHhh6>ERc zP@&Y)T7Vc$A_Fub69!@hw+MvY;t&4!V~W9<8^UwUK-qM2A8QU1gxG0XAJq%r2!0HC z3fe`Wi$Au^E%*{IoFlSXh*4TpR}$A_IL9VWc)!K;(oRB1f?=XobfW1wTbbE#?hE=s zs9iM54zGOTK8EJ)BLHe6K61f#G~(yCs#4UCcO~st!Z&zHrGqEZ7(Cv$vJ5TDK_OyJ$EH;%z{~*~P0lL@W%@yT(I!UB;Ne3n|bYqGQT9u%@ z;ieXtzo*3)&|~HDLRQ^<8g%Pi%Pq9OuT3wCU<>(d;h1s$^&yFy_B|sK2G#9=Qt56!bPR%{4EVYZkyi4Oc`PWvxQ@w4QmiA1Sx1zPW$3E4dgTL(SON>7Zit&D zLV~;_KOor+p9~wDtu=2z7kJ6~6vN8;d4bA~flesIH4kmcB2HPe~O%s4!>l1;i# zW4RH+@WKqbg08kiN*e=uAibq+iyRdS@TQib9x&%46;8pck4k%Z>{{E$ z0x$HDT9o~uOg?@bH+SL4@%~j8(a4f5_IVD2@v_hDa{ic9yzakPyDt>v?1Ay-?Yo8I zI=&yB_**7YU7>EpuBZ#o66_1Iza0d`1ivq>Yr$LelWnOjSfoBs9{$S`C*~&(@y=E6 z>-x}PllCtDk2%?j*v%zmmOi%>#tD$T82=X&LC7R*Yre_#;utSuF+d(0hJgCV^N&9F8&EoM*xt0e?Ao#x`Eg7I!|DL*vG=*^duNA^8PM!_io!yuyh* z$z2(+IDe5!^qwkkOjQAkphb$Dc<290!Bl$5hLM9qpH}Pa-Cy-96jzsR{c4(kmEWWJ z)F4-}Qm>BW@IHev>s8EP_8E8yk{8Zg7bIY`-UaqCby&JI_gq@xvvZ7rbchG2^SWxC_={`slfnY3qJ4l>N(51mm{T?aro^1XlOudS zBPXV*s&6<|LkMIyitj_KfZ? z$ysOVNJUaF2H;j33|#Amnk?m-gnmRMz`uvca&DW(RQVlDb@!KWctBY1wu1Dk?4K}m z;}X>%;FA%6Fb3jhrT?GRhJ-IA1PfP8VWCD`@}rYQc%ryvm=j!pa` znVR}-gcB$MSX7V!Tw!GW?C*}p_02wbtDB^d<5z>ATrmXN2I@eWK%?`7qbW0B8>=ts zp-{I!NN1kp>^w+dtY42Gu{WVSNVGwhQAu2|@~sT&06I)F%YhO|rNYE7ITmxckId{_ z_`d}R3V0D1mdQ)2bpqH|=D<81A&>_M<9&)Y?v%QGESP;63=`P-=Eoys?xxL7mJqQ~ zr&!E~Xg?*@x9MO{=XA_DqVIG#>zm5S2CVmrX*o-1i;MvJYq2Cm+>e{M8%QvhiID2b zEp|tju8OAf#e`bKTPP`C8J&d8Kv!$_e)RP_!qpV(0@^F04i^Hq1Ph3U%8AP{d=2VS zw@8LP5%!s`E*B#Av=0Zu8_5n=FZ*IeIOd#{P!2db)SeG}jBiZlE&zXgV5~PpdNm*c zM=CoD=y+{Y4F|Y?^aDcNo=~G0uZ!Jo1y*e&Z|EzIq{hjnAW{y3N$Av8o?x3(7=KNC&;gz<832(R7=&j4q=Dr>P{h)y9ZW&dB$mojTDSZ;Nx}obc5t7ScJyo5 zO3D7wg27NLJ!xHi@RPg#buGpOe`du~!e+#9_{>|#P1k3??dYj4&92J@;hhGjgzdfI zZ7afCl*XH%0%lmb4MlAdOah%Dw9&U~VAUO}=vH)%RfFA_W? znomlbDKjv4?UL?F(nS~M0B1w`Wb->v0Kvp>UZd(f*!ncd{$fb^1KC^$@9>7s_Hz}P zC|vrT83sjY@#c*p>FCH`EsAB0dP+-B4lSCWBwm0RHTvuD7_+P{IqJv|Vm!eXOCDRA zimPb;f=vchtsYcjoB&P%R~Fx~D_TsQ$@UnGGWCi1BBQnpGY-l6?;I26 zR44|8X$A=!Zk%L#4!Gp7zbESGhiCYA!HF`^Pq~|3u`6RPG>gjn#qAdR#heXOrvMV5 zst%0{&dS8VJ~=GmIud{be7VNb*L+qn$Jf>@1~G-1rJV;HzgE5@d-Cm6#O;HY4O!Y` z(A}am$sx@Co|=%DS4Z(kA{7~;?f{Vm6uDiO@F;pivba7R;$UM50Wl@`pK{b{2-=|z zfMsibTof>f2#hhdeF9qoI!ZzZOEJr`DGyS>M%|V>q>nc zyiQZYm?+Byw~rx9|y^`Z!AS~;34no^b+N#mdaR>hXw7Vz%z-k|87bA}i!bt)uBcUHBa6IKpPZjn!k~2?Z?-A67--uE} z6cSGJzS0w?-JA>(J`~1b%oB(ATdXQ9V`xrj5!OD#)NDr$4jp2Hf{25fKZx;>Zt%}qT)7{IxF|Fxj$O?>jC3<{GL(N1I#oZ?zpd2%L|| z0sJ3EPUS{_sO1xrak%ikfj+foeB5kQ3G&REJm$)rfB^SQ`_Vz@?rbLh^}BYD$R{*> z(tS)-j{?ms$rXvH3$|hj4{%bjXSik~XW*=3U3?s2*?Cjes~$7xY)DZ@evM(-29Z$3 z9&AS(=&8hWZ&q&6fs)5O72=OBcN9q1DP85ctd{bUa?0?_dZ+o_K2EFgZo}C%zm;j1 z2;Q6tJAo`5DBflHWH&d{))r!mk9vO`f9w{~OZ zs#XW_$3Dt_t%C$Zj8G2WL?;?;uN9}k_t^+ls>d% z%K=I+OSXSi3FBT=c?DC+J;p`zJr}2~k?^M#9@^`)J(!{|c3OIJ{EgriOrLA4d{}R& z|Hm@P-vfoVkQ8t=T&xAt*gr?8d9ZQ3t_EhOe{<$R98O_ecBJmQ-&rw8A@i4hr;;ym z8@ha*gtvTqqo_7aep-c_$@CECc8d?^xbj+w0=k_SB?Zw?etaA~4FOC)Zw<%(aYY^##qnU0 z4OH;)jgf@=iiPkpnTe6J6T)Gf1#{TkAjf%I7_l>0fFl_CuVFIkcnD$7snY~NzyYaE zX6i_*&i@RuShn@wQaPfHlK{FTGPwgQ0@TO_+J4PID*#1=Y#yX|MsBub%YMaLQk$=E z=HoL<5ki9Q_a{qsLIRrNW@B{t>1O)CaYTR`pXHvv8$jR9bel`(438e)xHS#o$@D2AIkI3qXi&|Nl39Y*}j+{J>&A>TLv6oSfFdEi%% z=uH;**CulF+JQE#A0Z9BZrQR48j*ByYy77f_p1rJ9D2?dLJl+G{f5-3w~9ktJA&+j z4IM=>8oMB8E+6E1{GnrZy<-czAhn2=UED9D6pcz?7j4+@upPXzyjgZpj|fvg$S&Cf z?0Vq8OsGx>(MU+XcqK9hVI4gar>}u}%-JxG?DFXqva22Ma;Gtt%9UMDU{@5G8T@o8 zQy$B%7D-5}kh^WA-7{){k&ryq_|z$nA3Dk5sz&9PjGqg3F=U`KCT__xvdeMLG^6ig z$ZWs4Ai1Uz3}>6LOX}vHZkTS`^X_{0r?LeMc7l4V%~QGXQUb$LwN^BaIhu zzwBy``&*C}LnQAz94&MiGA$F`NtnyWFM-jUih9&;FS+*A_pHA^gI&+L!#`1uM1pet z_l=I(RcZ7mNF>~{3$jbP1RbIV5{ahrL^&4hlHN#$UUUemkw{E)S+_#mh9*ba2S1&U zxM6B^XD#f)>~di2j~~~oqJqJ9WLyGxQR2P5_H!Cf9-b|>I|b0hyY7Kw7-Oa0pA_Tx&F}CF%oy? zc}p(yjae_)Rhc@Zr+v>wve}jxTF9=*k)KnWyeY@K_P7ME4w4aeF@6q?q6i)W21t%3 z{L%y8&{Tj?Z%__yhj_g;?7B2SYI(CZO3$k$Wprn&BKFnM$K*%d`* z`L0n3merd!QHWiUX>Ez&Q{a3Vx1=5w3IN1S)Xib+u_MluBcYgT%t#T^^Fn{UN)cO9!F`T*985_7h!UYa_W5q08ry5XFexLi>@-JmJ0mI-BSuyS&l%(uMMa z{Wu00vn!$$%tcg^c-+2p=h7m(q<7Cnc1?eiKSn*A=rD5H&GA#yj@o0Q#tW=N=?k*q zfd?lW2!(?~P0p!N=Ay{q_KLA;(d>L8ryFDZUAs{r6i*)~X$e7p5DD2ObxWrba!wKC z$irT{gJ%Rn!OnymNlOy?^YAyPor!up5)3iK$j_uB_NN`ZvWxno-fDCbMc}|0nOpYi z%dVH6*~L0alU=Iuh_oT?=U~@#90?ue*#~btA%WR7J+e5o@8Fyqqz;L?8zutMU*eHy zsFWW^ccu{T4>?|@e)Z@^iV!(j$L|JGH;~fZc^)x}z%e0(UDTVLCT1VEa&nb6dONE+ z@6G1IkzEzC)NrO59jEMa6WY)vMKN4EfXvfEwQwW^42UFTJ@-`K-{m&!l1?d_DH!bf zsU0VBWobfhDS%2iI#!k**G6UJ-9`*ih+PxO5wc4?2%@J9GIHC78y#YW2^?0~_^Qn#W=r7$_>d)ch&^y>wBWfVdei&@9hQfa4 z9E|SLf*1Br{*t5=kA;8%uwOFO$K$K75I;SMMOUG<0+j zwCIo{Xe^D%N=87WmidU__7!`Q8M(vKP7Kr>P9ID7IPPa z{s=(n5_VO7$6uKxZ;kM{XCdEBBty_WI83+i2YI8+x@p`4OdRV*IUI%#X)_NXt+SEb zz$|LOK&nR9ou<>X#>zCkHTf=`=75_ig=C2FyXcVJaX~+o7>K_4=-tfeiTwUi_tTU` zel((cD)R=jkZC^fulLqcIjAHc`KKeL^fHQodcrOUx(9EX%p-OCI6=_8$){|YuN?IES8^XDYpq)`bcRc8CVmTm{pfHQid4szrWeDJ2BW-N9+o|gzp z5-D{C*abn!aYELf4b-9m^7uY@qhKt$QFkYD+%tjt?ULXD zg0T=VptAW))1mJt?y?<$TBKf>mxc~>`4$2OctYg(gOjiOyGnnV>9w1z6KYTWaq}Ka z?#aLMBt?r_MC5yx3gd2b*URwpumBx>4MaU0v`h8?Kd1PWx#MYiR0QoEp1P~jGuhe# zsD@&}TsM#0DK6Q;?^mUb4zh?2B6*{fn}c1CrJX4GPHNLk{*kQRGSE>Pnhj(wG@G)r z>v5D_7|3QE;1j*6TnG{@F>jRiGkpMu?1GqqgBqBmZYkf%H>GK+hWkUN-Q@JC|CJwL&71>l#6#gY>7ZApNtbP97h}rm0deCzcJAm9mBXPBFhEKtJq~1n zz@cE#!QTb?POt1x2pHg!%1<>q_ClRd`vfT)#emZrXUHt%>>;iYI-a-6l0moEFnufR zL4qljIF~_)Lk2MsXdrXZWljv4UGm*K3_VHOkDN_5X0nwfND(w#19{!9ys6P7V;1?K z^2>Ko6T4M*68SeHPf8IfKl#pOORnCi1ZB#uy+ZBi8KP0UdILEerUtvbX{ZJm4X(&; zD&4BVaJU*gOPq?qXe9KdHOSgLqKm143DHHWi3hVc_=Us(?Tr(^E1Xv`Wp z8HXS}`J#cTY~@l>1m2)yR|62Ut5H{Jypxo7Cc7H5h$6$uWJj@R#BNP-$Vrj5LW~uM z0v+|ZZ~>brhoS*pw(sh)C6C+hO5Ob-bj$34YA71DqfG7kuwp18KkS-IPz(8F*N={S z3Jr2Ga-~o-9EYb8iiV8dQXv!#0RvoP-l+2&fu(vR8j`-_5J{00!6Y(_{;(0o4!PxP{3r)M zuKnJkiNHhXYJeRXE2<%J(X$1a%9P6GaaRjO!?ow|4n!#)oYXBHdoc zqhWh>-~gVZD8++oM}9B_44}}Rb>yDAbM4B7CGJ?WQ#v{vjQ7~VpOn#2Mc`nFIu}s` z4u<0(7m@s7)qwFaOJ)N(2Km+6EL=hmhKz$Kg@eB2KqVwY&|J+#a_wCN8Bz|XpCQ8? z8Mhj!Tx6_Jo{!U6g6vf2jXLHBZ<>9%fF{wOc4`YCxy;MR?1qr&wW1IN61s+6v#C;n zqaKnyPp{qNLn>2#6XBu=Tn-E{1Ps{PQARZeAS9(?SY*4X76a}5jS!`%dy;9nB0prT zTqau|KX7K(Y+z6R{vvNpAda~8@o}n0-eIQ(8jO#S$061k2O~qaa$^*uVX$lQ;G}EL zPtjn^VMBxA3Y1oUS-YnS#b=@rjaXssk9V@kY4URgm+LS^qEAzb?V@**T?34tjz?ry z$sk*DO+tQ=9_CmDkAC2YlL&*3iiZ*eAHDBhF2l)+ZG=om&SpPXvBHLR@Z*F1xyh!H`)8IXRRYH0mmbCZo%?};5V9Mydia<3vfX-l^?XKUcRvqi;o zj?akQKQnaSB{1^Pj#>ny5P%U@ntc!r1x+S;408xb1;`LC=tw9-HbQ;MOY6hXB||dO zHfkX9>Q5s!^(0wVel3p*V88~|i{YyIj28S5VIP-w|nHaA`XRXzn#Y^Luu^_~LbHph zB>F|<>zp(7_Pv7E!;~S?k$I3CopGmN@Sdz*-P-^(>=LIp+~Fz|DVF4u^t3;LWS~!E zm=YsV`ozrrAp9}0Sc^(GWd7$)I#l+sflQejZ_JiKkgP@}5a@CtX}|9S0=cP@rUR!D zrGEVe`QbNF(N!{QxbV3gZBQi!JxC?LQSU`D-u^oLfLU%q7M&P;!5*0*bALi=PGxQ5 zt+EIEIaL_qCa4t)LV_i6VydV%2v!#Bo^Sic!PbK)W$C;rMilAt9>R}5$FR=_-$jTb z1h#Pm>I=r(?GLntN<|;yl6k8#bM=8NCrJ%4)O)(BqjmgP7jZ~-SQK_o2pm~ym=|0j zJ7$9msmpbtj(jW8Y;-}!z?Bpgba!_m z)Q!C=Kxc9WyYXFzZZ!v(e?88SwJgU0LsR;f0BJ@s8XY*^r~;gwQggBMjv#@m+WM$` zfQw`75_|Xfq%^~2a+HIXe&q&}5Splp(1&%Pn3xK0WQy>(!U$KedvMaJEoTl1!774Q zvHrfpEU=xzb&@{lHMBuomm6T%&MXynr~>8|B2ZlWrRN? z!E~1v!EGrDNA4>bmhL0n!z4STa|*6vqk1RrOMX0N-0Z1@>IWi*k@>L9N945D(AO5{ zDqN}Fu5={T00d74%S|2BsI9&nt=$>1j%^DPFG+-O$RT=TzIDwC?HwqDX${k4j)3Tc zXdOj5C6eT`wH8a3iQzbx7e^OZwG*@>a}^&Us3@;{D9{gFV&X2sNVP(9Q6=;EwV4*B zkN}+cD5@4}VgMrj=k9o+L&8T~rv)C%f!Tv|W8k?&8tC7Q?IBB^?`-za$WpC`x}EQC z9WRN;qo+GyKTh#S7ufMES$1nrhMb-oy*kQAHg&k)Lld{C7hQQ>2*g6Hb9Bv7!ZPN3 zPR$7Ww%e6m!TxR`;T<}Yyyk!^{N30Zf$zNBEoYK#$?Jy+t@T02-vT1 zTh{>f&Pa2eW|WXKelu^=14ak9FvBp~Ri9>V5kra>_nQ@J)!mxW@FJyA)t>JSWg=Qf zf9~&MwzI4fFz*8gmCg5X!3vRU#`U=T9=-n}#|7>qJ`mFB(!$|a0}{-fHb8mA5Qysi zgndqI4I7eJwjyTwj!AJ)mt7-ctkV@xv9X;~XzLRlsNDG=Yvp(-(=^MC< zPg*C-=7w&QsX-hV~F=eMsQt@(l;z@DlUII5;Hs_jK|Sx!1H(wacQ3M7((!Kgs$ z2e)iY2C^ZI37Lt^ACQA)F5^4y1D#_g0LA zWReIpJ@l&TaqD8NC?{=!DA#g)hFJp3**618s0G}N0%BuQ$i7bA54YFCmy7aGTxLAK zJm_qIF@*SuJ316ct#0|VU1(4nH1!NJ!c32b7A4&EjxAZso3RSGr9f*aYCtq2HEUTt!zPGm48>F4$$2+_uos#bk+j$d&> zRC|zt8B>KP3(Ai9J_7;LYRZE5W30$vtVSs>O4lXpbPTrfEg;nP!HSAC(A9=3&P@=? zv``^fbxmzvKMURXfV7Z?d;1&=a^u*sRcw+Unn9I!kibPLqJv%n80)q>?!gD%U*wz_2raLZV!HuN;JKZy z3ZQd51=gBR)m7v}4N$T4Xbb|Hqz?hd%38T=SG-z{{_#j6g}f_6K~ z3(Vb&-%sQ3ls_tj5{goa0Y5Hp??){L(isvz74B?nRtSW~I8ysXfos0R?+TO(cr@up0Lp0L zMUOBIY^#;XMAaqljW(wo-5*a-P!*n>zb!cWhZ?_#p}d|5#9+Zl6(PL2GqCIxAWA(; zimta3yS7PDkgbE1GPOn_f8AJBY*lL<3^xl2XRu04nc)n~cs{v~uBd80$y;9admhw#g(%q!!2J^^RiDIe*(sZ?WW*BH1#hVYH$dAct0g!ru^OgOih{ zBOv^Bca$E>(gR!Fd_|Ng$f=UA@)6L;LlD3m|LX|3;V-EPsfZ(;HQmR@3)C4};WVd@ z9yo5=DUsLpWfL8=^z;@MQ8*u!xA6jvh0vt~BdqpgI5gI=WLgQbDLO<{nbb=Z3R^KE zVTeWp!0KU9DKC=@P?h;)^`w6=O!kqlyWqcR?ZDI5p#jm3DGrd zj~26y4*{&EiPmgnl>m zm9p8Y78j!p+avy!C}xWevpOD25QMuTlF~KKE#6Ayw#uNf(9>y`W-x{@i7OEw@-J~2 z!r~;a?O=rn@CA`k^@71ghjVL&Mk5o*I@q%F4wWJ!@lem+Q z4YASx2^IC;7$5G#e;O(=&PCWG(GTn{GCJO9{YLjt$Pz;J%kNw7*f7M2wzs8UXtDvz zsija^qSXQ72=1$c*4c*MR~S&;HO~i`JvJDPUNyD)BP$s=-rWf?$`yG6*;o1Tx~f9s z^O0xEtCxexd=m4|#8sw)j=IAI;uTT5w}h}M7j__%?2%;M=z|qaC8S+W$5Lc2#9A*) zev?a~z9|mhPEb_`b(?dm+8r-{W@Vxw;h#Fn^oyl~hvHEZck9FFY10uV+F9$y)tsH@ zWI|tW<z%1~WY$9FEz8fa4m3b%wYPwv!pTxCy&R?x%-ffhxnD=THC%HN;i@0PUIg zhYU3r7eOFxd85>b-m0?xw;=qXNK{JaXlwj&^-k#uUtH+})P0S2kkEQy{n=a(aha!m zDR?ARQKW!7jI=(PT&H*xCP^Av7P&cJzEaNJS6J)?vq5y6w;d#G;EZOPz-wO2-z!33 z38*QvwJ;Ido8E}3|1s`SApMyK1qbxIeg7gNFS4w5e1I9RnWK>BgHqV39ojFM{(uA9 zTmAa3@A{7=EOubZL5qo{Iy~5-SoRxU?HDSs@oin}P%K|>J73gZ{B{Dv5vEmBS>O4o zlYoceKf$P0SYgB#vU>g!exW>ygqoYlowj5YycXHKaSkCTzC;ZZT*Yqs-6pTT@9IoN zKry{z(8O^bjUGdsg2;1A2f@dH&8W2E3_w?PR4m;u-7B(-V+7=~p=op+{Sp?#4>^IO zZ>O6UYm-4q0H7${V4m$fVO?aKi?ZOVmE_ldCe&{Xf~7yFK_o)LTzU}VGxd^Jl4!3z zz#y6}nn>bhDx)jzgmm7Z9ifbQ`ho6gSM;-l7&o#GbZhQ`B0OiadpOO}71WB9)rOMe z(o2hSz-u(x<(Z7W#Z$!=-O6>C zWZTdD6oaPzZfpUM*%v2caWYYJoJv7!d7DVzMW`B_7SOUvU3=LC8+njdcoC83TR@uR zGxWF}?v#eLJ|n7}|1#^n+UHr#U8;Ehg)FnBlSvSRf-5S)88~=>hU6Z^fO_TaFd)Hm z%#bRWx}r3r^*O zdxv}?6T-~|hdz6Nq>s6%1RQ7{Jz(EQp-z%jTaLozZlmRGeB*RP( zy%#8qW7thK+=nHE$Z2I~!+0W5*E(pAAIbP5?f>wym{K^=ec`Iho(%FY**SRF_Z5YbkaIDS3Hs-*wy(U$&#S~b23*M$LP{@Ms?yoFU+ z_qW*k<-4iFfv(`G`C9dTN?G+VtD`HNG2i9{()pqbb)rC?w|*$eIG8KM1tlpx)9JCd zIQ^t-F@M5*#22{KA~*Mcna`dt%LXh^{ZJ=oE}`Ne z(4vF0#~)%LQIY%KGzU@Mf{F3O$F0^19@7kgN4;*S>?_6uz8Qu^%NU!HjT>52l{I!k zP^4ax${VM&W;7LUs+vN@7a1r%2^c<`cqWR?xB&?5k5MdV`HQd6;-TtK#_yESgh6LG zH26j!4HJ@G$FGc94EpS*-gZ_bGo<*9kivafDC+0NTPcWH#MQ zw*?vLHJcDO!|J>MtQRU-c2nR+mzdUbPCkT|&kzGUK0aSlB_76UH@5;Qhk&5xuH{f2 zUo*dBhya_uhdo7m6vB?A%AsE);NMwdPynNPUR}*1V`pYS8JEcJg?4?E6Ygzo2bke& zkGYE>NLTJ-B`gg9YE-@U>Mm`i&_P`APRdsMycJ_J!Tue{2i9JbSuMEF)J&|q02 z(&NSPfsGx^$wyF91*QS1P6WEyH315#gKASJ16TY%fZmGtJiCrK6Y4c#ihltTJ1tFv zDz|Y3Mq)|<4w2%Fl!J|!iJnqwA=MMpC71|?{JfpgQm&(uO39fS)f1jL^il0)%%`!mxc2xfV^O6GmJIMUPFC zOndZCwW2{P4@)9hBU{luB_$1g2y3D9cSg+Ck}`hcCo6d8D=+0He|pX<;c5cOw<&{W zAZH(A-tqdk@i4)IBH(_8grbT}vYzo58W&LRH+GsZ{s?*QkWM7vVz$|7M%*^SG#{fZcdR3sTtnjGTU|BS}NXvy;L=Zkc8HlR60BAhEgizXL}}+XcsO?67s1jP+gt&MNIz8ypR+$; zxu1(AwRsXTG{{w_k#wv;8hDQuB+Fj`j8vB(GL04E)yLnpdnOdf%?AK8i1*j0f{MbZ z3UU|3Y5uo|u&D0#uDYK!K}EKK*M?`@}sV` zWJl6;zf*<3myhp~l5GBi~H(po+gI;*&N@IfZ4 zz1BU0=lYE@=XWJH97t|?jmwHUi7loag6Cua8I4$TUK`9z*g}>Q&Y9gLflliZ1Bi%d zY=N!eoDAQf_8=`uUsg7JD~==-tRiMC7|Y=@mQED}KD8gh^Yd9TbDeMU=d~G*b^2nQ z*3&lzAAGu-LxWH+Sh6Z1>atz>+P}i{ZlpwhMNYFNjl+$A{n)DFlAR@-8zrttJi-4u zx#NQfS2YU2lC=n|ZNZGPX&8Y+7Z?=i+NHZ`slPm0Cx13T=qzf>4A2%1k=*O^#T*XT z2$G4)HU>t2lp%m5ovgrviv!V(-nkn+KxZa7&r|*`7l3!r7;uanTMqJ@(>9H|R%BQx z?v`2Dtgl(W*~@&(4Zu@B$JBmcMhXIAhWekaY&`zk$-wA0sZ-L#AsukA{Q|#n$9Yk8 zVs32!O}Pe}UB;KTauw zxvAh^T>y-zy|i0*&8KUNMY|2A4`T9W5IkKtY7fiT}3?|Q)&K^6Qy99#t1UORZOJFD`Oh&^!@`epntOW8Lj$|0`bwi~{ocBU; z_^%1JiK>TC1a>Ry{-R|DmR&h^tGUu#QE9(C;!BL5K;YKn*m6<47n)xq5aw_=Y%Z~K zNHA**k(`Ak_R{nDuv?@ET1)n6{8f>f8j3~={jkHu_5gJ9M!_^ew5#G`m6Ee;7!aw& z=vi6-%cqG`#{SZ|5@fxm<$3Hg4i+1C;5q2FUk4>NVmYAEP}{cYM^DZTK9ql6r}cv7 z{y+$F*g5{#cr7C`4dss7rkdmCX}96A={gWB)UKW9Nny{r{Ts78kZUeW@FMF&yuNdV zCx7CT0Ya1Fr3LsD%n{hW5Qp$@UMCu`3>>}ve90Zz+%885Bhv=OQZ@~)))S%gXG!Sz!NO@8=AF*2NGB)G<@+5RrDux$F28I zDj5R~Dck!_f#YFrFI`!QT|?wxLOk>gCHFK^7(n92@|klgb?X2`sGgwfXyaZigJTkp zMrpo|V+!+w7%-nuZGKxMGGP5oB>V7mmq87 zU#i|~Du1=`3qPOd7*|h&=p5!#%$ZIIz2c6x#k^8l-(S4IR{&9S-w{B3WBUze(l2dH z@b4u(yeNa0M+k&e8FMF9C=&%3BbxebB>sy@Ax^rHM(yMPMWL_4$-4G!r%O^c7XbC7K;& zBoh^VDvDQxrB7SeeyKlAFG>ekLG6V+yzXx&dFPh}B-J^M)KEX!@5HSFKC`B)&#~M+ zVQnMyD*cge*M!Ua(Hzn@o7YECn>Hv0cLy;`0LUTChCq#x_Q}}JgO?rM0e7V^S7Br) zkIo90AF!Z~JnnG%QBPG64E|-?SvkAj_Y|Dh=(Cf_CRCNQ!UeS2sRTP2^rUmrBnk{ipQYrL{e9_VM4%cs;ir(G|nJ~<+c}~ zpHpMroZh)bV)Ca;d7qq z6CgORr8Dx^&iuqhh7Ho@Qu!RM}2&87c?bQA zf8ij3-VnI*>Kc>(S1HR?u}})SL6lQYp+1321!lm)g=!Z^S`-!e&Io)MV<1;c+<43o zrrUv>uv8nSm)bA^d7Yt0C+Q#!E=4XN@`3uc`uNBVI4mY{KSSyvOn3Xx?O2fae3NY9w4x&_eS z{BhxHp=*SprhtNMrJ*p`#qvHPxvgcq$vOy|eKQQE_FdnLcMxd2Hc{b#poFxqX06~C zwGqoREtg+1k?wJyLJb|Ck7v zQ6`|lIhrp5qTY}PZAzq6Q=}D&i>@_hjUtT=c}k=F?vXP8(0@~-U^)QT;`L89B0KJ( z(>lW?n%cwk*HET7SdLg(`8i;VFO>DfjL4RR_c0ee`5D}Dq#b2m0z)5V@RxZ`BRbgv zI*%{U8U7kaxhA?UAG4`ix_GmV1bYT$E#hjtPSIsrhfZ$LHP*;^h z)~iL+Dx4w!7Rxgtub?+T+ank(q{*ypPt{-65^eLZ;bcUj{xcViqOwIEoAF{ zF&yb7AmNvdfyESJDvxELgQY zgNI_fXeJ_rzY9Mf>k$4w;q9s}8uapeY69&gGzA;lx#F@%<i>p2 zti)Y)-z+Wj8O3;vOnJch9c>zzTf`{SGuLFRu^!*!iiYYS-ebMBT+nk``i~aSw$n+) zx{yc)t*7cC5bKL)*@5FP!5U3CK2c(9{>8HUdZeRzf?gce(ln97U5yaF*UItVmrvU z7cWZ+@ED6gJPjMf2$hV~xGFw{CtLA;s=c@?TKl4+e#uDn zC}BI3;u@-kKirKUMM*T1Bi_y^QZTdNE;CBE#E}TR0AGFR2i`SRj;($S^bUrm&M+i_ z+Gex*2<2u#5FZ=dZ{-dO^;lFTo{Noqd@A+&*-kAnkaKOjqz>GGQ>0Un1``$)?txy_ zA(e`NpzH-nx`-Y%mCl-|`Whqj4ygo{7C_tvp7Lr4@pB}O$;SxKQKi6_4NYIR9ugh@u5RWa%;P{b%IK09e!ZB-=VS(2L*_L1 zwj6gNTD}0*N~7j9AXuY3$6>AtIfKLXJF_9Y!PAHQSQsuiRdAg57~)`0Ud>Js#>gKa zG2Cv1t+$uOdGY!JGoM>T_(>a*uty6uaoEpfmn4oy`H0Re(bS@1g$+-Z04e$^B$eKf z^uEpbU9AEX)12pzS8CS_Q7M^sZuFxXwtG2@eoudJVWH~|C5N8OHtVHYWfeEem7Zge zA#zWG7a7C-INqMNU@=A}4IstH*3l2qGh}T9i>!(65_*F69uE+mEoz=;KBDaarXzpl z>Ad?aK6VC*1jXAaqk*oq(n4Vuc z-foilGpyqe3Ioun`v5u}i@cpiQ7=tW+}yuPwkc8=@Y#Ble9&0?4*9_>2I>tI5r#bl zkOLwT{&MS(*MN;S^p_O22Qx2#z?XhrMwKWQq=-nVBcn7T_g)M{+>?m)WX4k${$pOc z{FwdC0hmKBAuoZOerQ|w$?uYy0@hVt>>AM%QxEy*$ozf9@>;(r45Y#ysnQ}weW7aC zjlv=`lT4WOEa{c7s=(3f)n8VOC2%DCun1NX)xgwYGShZiW^ssE1To}wP8l9bySdcm zHF>9~PR?9V#!x??JGePTi^f8Ey!rVYZ;U`xIzEKS{e-negdR43B*J(GdRUe^No82H zHRjtYJZF5dl=kezd8$T1r#}_|a5V&-=PJXoBvLwmIkttlsKO zg9)%W+DHk`5eD+w&}J#9&Z1W?rJdoB%^nQj z+7(~pzvs0QToEE>8KgmUf(A1i9AuJz6k%%tb)vs%K7^gkcZYge>*sAGcOMYw+Wi)| zqe4x`dI{gtYrCjZ!;{1wgarjBvqiH}2mH8q>LU!fc?WZ?iAD<5JEJNo7`6x_r!ITP z5RPf-uiV98*+gmK+@r)t9dwyx)eRma4{y<5DXPsvS$i+yp_V-x^ih%^KVt)8Vh$7v z&@I6Wk-zJ)y$J&!ratte9H`Bi=Lj%sk~u6Ma$aj?pvzFWjK{tLry3%KW8_)@pu{5! z3ECU>g=b{Ol`>2rvlw=YFR>-2H*L&H9MngTwqzDnaTx+Nb*7X%(M5z)sE-ayI8b&} zN}10pGb9F9t(IDtLodA!z?-KNJpO>U0;`H)t(^XD>b-_$TdiE;8WwTGC>8slePk%G zIk$N1wcnBTy8HAA3|XHh77gf`wn^C$4G5Vs*6rq=R}>z^jEv`m#iSX{W*uEtoOr0; zT_}Z^jHppkj! z*1g%}tf;$=7^c{p;}8(aUgE1_wTdw8m*Tkxpx#2n3S;~jlDCs4q^!?UJ0u3-cq`oU zV!(mS+Q-wpcKBsE^dLI|qB~sQU&xmh`@0K!b zfpSARomML3)ty_JK7^tj{N;YOkuU^GwrtSU+_;tx^Q2=YmY96GyAt#Z+z8*ty^Kab z8jp)&lI0`8g@r_=JPp%V6HB&E3n;b#`yq6TNnceC8@hnoQ*wYSOYT78kYm1$sf0eK z9aVx@v6ff551vT!15L>;z)R%E_P(hv%5Z>H#UV#J>LVO{hTk7m0|YaD^> zx^9C9yGxtBiL_^eiy+uq#TIy_xA)SU|7oCOsfc;|$k;;ng6 zs*Tdp*M3nb$|Z%%QppUb{lkc&joFMT2}3ASW~eHw%&${L074YB2@9ee6dSsRNaH;F zlcFQrzMz(`F_@Jm7`xytIT?Cs^|g6Q36W3v!jJa37n(^@JGv6A`BrAc=&9}`9t)S} z+NU%;c<})$q4VLPmd0#q6bXAG9Ni>w#KBog=jCs=WKNCTtXm-0{ixQD!)M4{KxR4yi zF-`(6lqDyexB6g7izm<0p>mx;q{X=vmywe>y3}gu6)(8Ar(0dMo8W&VO(tD2dj5qJ zGr0gL=li)pG`sahHo-ScqN*{+XpQ0Ws#Mub&gSC3T04T>5z+QLN)AO{Ej5Ca#dmcbX%{PaNUdLbzGS zXoww-lH@cH%D&S?zS-%%@=PL9v+ zG$KXx)?*GQwKpw~DlBlSc=VP=@VAcLdqHd!3ynhH+%TgZRhc)u{0NYQiWPU?CD^Fu+=r#5 zHeIyC%p=s@){JTNJ@}o3z38+FbZ+Dc8gNZ6yM?rBUC2u=K-V)eO<7&=!)X;iBEwUCk+J-90>%>EDIl}k9T)y@Nc zDibD&=sEgxCwY*{z+?rXNs`IHf#n1pdn08P-x=t*1 zPoC&RZ>pTw?@NQ;cu#=nXd*-MPr729kNyF=Ie6c?2RXr_#so95=PkPAf?+7)bT5Vt zYFZY$gfqG~q!<;vgj<#8crZzWDBhmIc5w3)-Z&;v;V9cN&e4y7OG+91@G}7a+YmpI z45Gma^BZJgN=Ui*4|N#eb!GNX(;z=AEcoQ{hkWiCWuHux@%uz;^Hf3Ar!Z2Q!q(0> zUJq(4km)N6pb*Q@p`J6I6AlBHDTE}#PJc`&h!L~{tgt+0fdG2to5c6LBa(WJiRQ*( zJJL63F_5#5W~Bq^H7oh z2lYX#KX^~;EH>$IaAJ$W1P*J;p`u7o8)p~?VSE%51Lfawf``o>#5Wx`{7N6Ma{JEd zO;W2b??OOH-El!a^F1+hxoY8bKT{jQF4g?~sZtH4y(2pHht1^}ZMc;9YXT z{h*sgZpG0&{(d_zQHIrK>e|8M73vcVgH9vWoGqOZl=X6?1BNhOkJ$`lk`m9^>jgKvZdUEsyw{R>7U-g1*G z78Tm4eG)lCKtHM|hm|#NA$4m9KAMv0`Z)E3;EW&ZB!0KmTGf<6?l#bgl|0Gf%+1Qu zG>MNX=IgPU1polnNUvsfZK`PD+R~c;VLc0$F|^2D{>5C*2E83xa_Mxk)Oy&z8Df)= z4V6Kk5aIMvfkRh3J@l2VL8*evIX6&jh^!Qz7-9cqB#-VGXQ4Vs!9C*b^_mj0-q>Ev z4c+EOKsXoZgp=u;xa8s zD0_>0515nzAzMS#QQnfMbeMxCGr;3`4~5c{Be?KGAn-)QP3(#K-Zy2R47) z?|)@b?I5g^Yi5TBp?*FSd9~*MIV^Hs+lJf-!2i_ASloG4jltdwcX6JCJzblbxtwBB zl|{YsB}-c2D+%>WZBO0E`ym5X6*pob&ReB#0ce|v6t&JyI99wA^5~z_cPH}{fO#4+ zqcN{%(@!_S6uVkRD*c1W{#ETj!J-Ah3Iv%$~Q-Z%l|=dr(UI%5#jR zkhsMt3pvjUkFry>v=%|)^By5uxB!`3#7gp2L6WpIuZ;H|luTzUd*h|Tfw8xVsrWD)Esd8(r&(;Ci*wghHogk`i* z3^{IaMz+IhsJk_R+?7GNY)bol>{k%4m{onC;U0oZ@pQLdFOc~VOVo5$rx|;D@8xwl z_wfZ1L4hMFoxvZQ7)Lt?le~)=lJf6j@C3d%PW7t#5goA73Hv|re;r^24T&M-RDnD`NF<5 z`~f>9KjV^bGCuR4S%yD={ zoyBe_M1Rd83tpq3zJ@(&?b56hQD9+4XR&~}zGXoSA;b*l7kK*p%DR_ak$nO~yv5Wq zgWK35CRavtV+-K`$BdZRRXlV=v*D3(kh1@>@Moj1EZC zMS<^e+4w5z8EBM20Eo@S<4|eQd1=zV)e=Gk@x(Jr6}1~?>4D%MWEl9NPd2f4ZaTUC zqJhI0J+juUsmrFf8_lDnwm3(|a)ZEk zCO9{~_08q$d_0~6002ZlyT5G@;V9ObLKId@0*4tns0xmWe^V9JqV_Mn0PbFgW9Tf! zu57KULva@nO>f%X-5IB9hmbJDGx*{9G2cBuN*V{jmG?Z~QrU@}e z*_clC5$VF*+h;(;zbK++AcOwFvBr@aM??wPJ8#(Db)){+vfN74j8f4h)xx5ujc{7Q zigDRi{e3EmTR1Yt&+(I?MRSU3_L?zRpXYc`Q$!UeH_QT;4eYsuXsP6Orp%79$0jw? z9KqZUBS7M(xb(3yrx1K6l;s%Lqv7Mt(uY)f0Zoz|6P}C5$)3f(V(ceVLY%urlon5$ zdVveNcGijX*r^huLr+z2QyUpM`ICj;Lb)J9>9O$Ry%62t6&A~GEx`y(r>}+>T20uw1Y%0l{6 z5e@{7`OM1Gq5lirABhT}^t?gHu_}2hPxc`^30XI+M_FF-Xe=+aiz|MzLmDcZ2l8*W zS09g;kNXDfh(BhT!qM*e*&$0S`s`9GJrn(K4$gy;uC9az!g+YLlYX8R^gPtaSD95< zeMChDB@V2noVL%Bb7AO;%4L{d2Lh^LuK<(cN!J%Lsm_(e!y56w7@^aRwea z9xPYH8k`t&zFCK=pcwCReSUc63HHw3fYcyMS6t2nz$OPt=Nc$hB}4Fw5#pUgAj^WO zN>d6ap?e(nE+_~)Wr1+K;m)nBt7~U$BCQ7N786!IpL5+Xv5OJ=SB0|JJXV3RI;svwi(e}>^Olvz-+@f=%AnIL3mfk zn=#8TllXJW9V9*Jk(?{>kTVi^r=|x{utgKt61oT>r-ix$j(-P;j;EzYD>#?AB!OeA zs-XF|*Ck{mVm4*f2x?jU2`-E0Fq8P(=}+@Ib#5!k}~$h?g52W zU&ZD9bey&iv_S_?W=O211#RgL%sNNiw=VxnYi4PVZ=^UNN9K4vFX5TKc4KqT1D+t5 zNp=Y-$;tQ~KA~-6!HTY4*WCGeHn0;}2m@W`US&Z{0 z+(qc~ENhlxm5ISU3Jf98<>LhAf3E-P6V@Zp1fn{iefZrzm-XhKEaoiA5Y_F*plx_y^=dy&bS~gp z3gxcE^z8f)V?G0#?|nx=h}i;Os40DQ49%=)7ePlN8pJmqc)u zbJv{Z(~AJ-xe>L8$xNyNK>rlepBjN^LEoS?7*VZ_!lrU%+DW44K=*oumR*YKI~uZ6 zv-O{}_sJfvfYJxNH%~ret^^D;wA~X;anb-i=5-rQ@AFEQP&s!3q&_SSGRcYoUF&VA z>H|dGaA-c_x-JR|a5b@q9u-4S?nL7$Xl2ZL)2bHIH$;@ls*NsruM%c>9x_O-0*amB zU$!KTW&Ed#^>M&(WcGCl1od!nnVO0NjEJ&P6|4S%qa9R;0TWE#x!8Q9_ zFs%~%kqzg43%wxI!6TYDRJhwYGOeUW2zkh-SW`wc_jKpntV;>9hx_BI;3Hw zFF0~4p?p<-cKD#QAEFl_ihLhD!J96F-^3mp+KDeWI<5&k7Ee z@%UwMh;(G&;$dDE>Fe~sG#BH=b?-*D*Y>Ddkd`+DMQf%xm<##AkLCnIej{u$Y7p!Q z$&6_ZdkV2S_82AR>ldPdnIlqM4PWS&ko-x;n5okT zqeTlRuHYPDCR?+%gaU zI>7tupNbpUyfcxZ>o>Fq1y$}kGi^kuqDmU$zyoa!f>(p`lp;2?Bd8Mx9E{;DnQ7q$ zV+Rt5FN(7j5VmvtFZ9Kev}KBb$U&RW4&zKbiVh-5YsiJKJqRAOnmulZf4CA0Fgj-l zF;=QZ`rdG4Wp_eb^ne7pxPbcv92%&oxDiQK=jNrATN0=^h->FhxF*Bncx5;0C7EYT z$Mz8+{s52&fICo2sW}gqsDq-mDN$F?*tKulL4OpuriZky^a^5%p=xd}s>q4T@uf@=~J`v(HH;w5jZwE`W% zZ4`gKkTOV07@moBXC6_Dg4RxZQm*YGhb+ieN3`-R1&7EXl(&j<5`WT|@H>LCl6S=r zTNzh3EZVLis)!Ji@YIX(B?;aB4ctvGnJfwY6M|$Eljbe<`sojM_1Zv-)BrfjA!x7y zP{}-pH!j4@6p|C*oUGbzPfRgYp?fbctT~#@nm7{ZZeV5}P5X)#>vz%;hYb*`CGkBV zf($57ZBusWCv~a)vr)!9@Vbp9x9)O8MXKHz1=%PJ{s0sGkH%?;!v481Lf}nj8a6QM zEIP0LfhCroe#H?=&`e>QPM9#oi!i^iNua<-Ryh{GdKZ_5jpb#@0LyT1E=5CEAY=J` z&W!MhzPd2-j@XJ0e7QFb88W)PV&21IT5H;zHa15Lg3JvCcizLgXZ9#B9<`VGgPUfM zZ>dd!M5mp%22JWka)PN_y93M6K|di?h)TjAB{6>&J;z~*u0NG(W@JwHiHLQEYfB-? zi5;-noeEGp)O_h<#Oz?Nx0fTaJ3p<#C^n+ss9tgS2nrc}KNOCaWS{v}b8>0leQ*H+kLr9R7x2I#N zy^Pk>nqEau(9~g+T7lJJ$YbZdq+>ycdrHW$Ad~;|LZS`2+@3RGs?xpj1o?;vYUfUg!5-$`JjT5#GUG6B)m~M&{&nU@q<%{ zW5nf<#7r&7D(gutfnZ}Hn{$F=zg#j+IkN?b*X{A?foza;p^g-tLxJ}!gBKtzZ zX-)>Wc$^$7AxY_T%!*7QV9i&-2gBU4nYd2~BfP3q3IW?y3)HeD7!gdj?ELEdfBHlS zKl^&*#aKR1>R)sv3e3U6uIkJqvrK6sDXDgAC|*9c_0yoLs4Cj}t1i3bm*&#b!&T3ro-f&y_2b@0yIUq$P~ znVo9L&nIm^&tZfpw+ zVn_^ZMkL`$hr7|E{c;Mh)UDJ4?K^d>L^2Y)CIJ01s#tbWqq>E9t?#hRIjEI0InY3y z*x+l6<6SCfErSYXAxwvPbsB}t@!znSMqXaZ%<;Pmsm{J59Q(hTPdPq(R05_1lAn&p zZ~DyHDJ~RDU?H1E<^JrS(%;5`iblrCbW-~A(G1s;8!5NOWp~lnX;RDeoQW3#5&*ZF zMn4=*U(WfEl25Yx7ic$`77*_9S5g`L5fO-y(M)1FoBkASw#rFhu26LyI!V;p{xL%7 zM>S{U?R;CC7wU(X(Hy*96;+c!?f@W)ct7Af z{lwLWvc(qm0zZiv=rX&~&VgimXmu-Q`3`=FB$4t(#t!pMF>1Ppvu%U(fHP_rEC|gK zBOxILSsKOq)E$IqeNXh_mRj>Ci(v8xl=jH~e3YjC8!jlit(#S)2BpY`KMQWn2{f5D zxXAG1N@V8~|21UOlan3Fh{=8oguaM7Ff{0(?mC93y`+2)K4SJ;ED=G_+U3cxLOnNa zI|m#9C%Dpd>$^F0D(ipzTfqfvmaom#k(XvZ$Vh&a`m(NHqh%y#3U0*y4xU`@ucJ&Y#RVHK z7cM8V;N;DEfK96-eiS^G3o(WrV+AcajO~#=m&7R40#+Uo+x(!^e}uXLNLj+b5g6j& zq*x8yEgD_$Td`w@)Kfb6V0Mz{Yqv7(a&Cgv^sttaAylzdQKLXfr#-?AJ@>M>FG;De(m|37VE^57fXm12W z3p#srbT<3=vt~9kHP21Hw>ee;UdTSV>TULX;(U4v^J^y28)(WnK&|y5(AEi1=2FP$ zVcQ}sB11MX=Cer^H1B)>P(q@$ot^cxD>k`7^!u8efYnn^jTqu57!$hnbaJt@Y1D(r zUEl-C$#j2F-*322Y%z&t-B zk9lZ%z1I`evb__6q>NgoAAFpu)SB4+PQd9(bkvy+Dw3Jd^A?~f%zVt6;8{><@Q3G0 zD%{Q&G0>={Tqf8?LRwYrv5%z?Xds(k0;6PgXdiWJLaitk!Ubva`Vqs;{L5+g!q7<; zP(%nVYKVaI5byl;CoT!W|?prU;NB1pNf??gGwfaVd2WbK^Mfi7ir`NR2 zgHIu)MC%6tU>==G>Gyf+ZbQmHX(UrohFy07zdEUKq!t-_Tu4|ioq(SbOLW&Pw!|_q zNZXjmv)tk>@<+H0*ae5l#;Gy;p%-EqK8#}WDiua%e^(5Jy;1}C<&A*&>_p_)F z9YU|>m;v6q6@^fL#vv$W^fdc!2hB@)>xhBHz18?k~KK#M{%Uh zBjorvE=XvVlcvHPpLUS@cH;G?GRRC;obO~cxp$6l;$=kno-q2&!)c0}CslIA+vVc!~u2HF*NYlh{=QPf!3} zB9t9dN<7wUnm$B?;AQHYL{x^!id#=0<&uvDI9zz$yvDS zbuJ+q6=-k;)ve*Ze9y54<~H7Ah8NpBY7`LoI4{JLgF;1W_98GMBA6ue`>{dKaQn8> z(wM|fKO|CzC>2y)yhy^=xqhfeT#&@}^Bqo7Y@Ex1bgLg9a#LeZOxpB``{+vlVuEEq z5E+*o>rK6(p05Uu*boPtp?~dhHvT*sAk~`ntF&TT7nv95haLy2NW1b0h&W-@4m&9 ze3Cx|)pVf*n{!SYiI68P?pFf}bICOu_%p1&PU zD1?|S$rB4YgJw_Hw@(Io+v>_>If)2KN%oD+22|`T;w5brGu~}_IfoRXGl1>gaMyB% zC-oCdQ@q4x-a$up<;O6Gt9WOj!um$|ngI2Pch+0nU28aQR!{oaVl!-!owil739;p% zg_%%R9a;MFj4_@dZpyBZ!xD31ZGgfHx_Y{}dsILCMl-=2!abz`aOYrkjlFYOMh~lr z1$`_0^4M46iA1mP$oU*juLyvpWQ*&X*;0d56KsRVfWHXTbi#B0kHM(%i$+k;!H5$? zOxejrBLMzbcA@p2kgIjaS;8Cmq=d(CG2n7qH)=pQUY(kyC^*omU0I*=1x~iPGQXqJ zxZRTJdm+0!zz;Sz)>15A8MKo9?$E*r)%X{Ll2Dfse$6694U_x{3_MON)=UZIa)#z0S!}D z(eNUl#n86=0h+*tdWm6c!w}0{Ww!FUXz317inw1c&nuvrS?TyrW(q+Cb0F?0uvCOh zcFNO(0}?k@WrwW`^@>UbNzr9TLm8ZrA*wMwZ_$r|CkY z$805q(b9uRtY!2OaPWTg8zNuJ`Gx14i<;IEuapTIG%U3l;_;21EZlKqB&i zku!CAfcBV*qmox}0@oc_2dj}yn;7cQjNz-F%7q2VX>02dc_}7Uk9iIoh7fB~$1urp zPNLsJ)$%jrtFko(;Af_zFnx`vjoS#26^=y&x`pPa2x~07INOacBr{I|7GKmB8Nns@ zyyf$KL`!t;ZU^~>fi|)Pi7Gb70n1w|2RLE0hGGp3QwUnq5$tqaTXU`phx1UEge-{2 zjfu(FkRgMC7F?WdFBBQ~Du)#miI%d0GqG>QqAQDk@sqLJ-hj6Xa8jp}HAt!dgG2GB zmWbHvUz)A;hOYn_2lX)<4uUIzKgJRNCI$D=^JBZsrpb%+(FYLGfa1ATyfZS8+-CxL zPz)O=5Lh6_#Jy1KQ`2OhI}yCeA>hlp`WQLAW3;KLRmj}mFCMLdpRATGxp(S`K}0p? zgh|_%3o)(0a`0LL1Ih|Ni^=$oNL949p@;0O6wZ6CTG^oYX*{Ahg;9~m&|*EfBVYXs z2MNQurqeUBmDDyPmQI$4?30Y?$^eKNr6r+|KGu9bKBK5Sp4dREn98lVw83zJxI>Xb z`aYUuYG@j!Qu+Cs&9ZW*^HrXYMtN9~L&i9-qNdug!7$RG6Hd@?_`>%6E2n^yg(GHq zi^IP8KlSn)mf3X`6d$r1IN}1G*^<_^gd=1T{4C>>|ED%3gU?Av{NWj??6Adq8U84@ zc_jW&7r~eGIZv*Lga|;ax588sx=<1(QSK4WQRF!HVuUU}><}{@%8Q)~V`0^Fjh8}p z0z0roJ>u)NZ7S_K3hc--F@T|d(ik?N*2eqtTXtV?o`j^>Ag4#-+PYEdWPob@eUxKO zaFP)oI*H;~V7w2sJHM+s2%g&Vp!0_)18y5Tj{_kA5QA=X`OBKgv2i&O3_VKU{9Ql; zAy~oUAe@NKmTG39qp{DG4IewE9Fi?Hz<-%Ly|Jk!lZ$q^dGFsflH&W zN|OR-Y4;IGolviE@L%CNm$1D^;BAr3p2_m0LnW@j{e-gV5(I=&M-@=1Mm(uXiau?|;43ST(HfpyU2F_##iKhj@ z_jP5%;^~~=*rbI*^WDdcaejuzKFf^_Sxfp+7*3+iPWr#fWY~sU>n_l7S5GSXU)VSY zn%WQcLqf$UvC?MskIqvI19xrc3@W`(7HaUg52wDSwnsE%z=9w@VM{yj%TcJLvbO7H z-G6a|rZiHOHVC%NXigAHFw~Cg<+*yN(P4H|a@(?0KWh!hY2zM6LydR4L?) zuxM?gma0qW2~2Q2T9@AHo_Oqq{sm*kUy5|5>)^G_cGeLr34Z5N4)(+GcK2J5Dp^tw znNd(LDgm^MOACt@^M|(cKb?>7gYgIAP#<|O2m{cv693noUB>>U)B9EdDSDgskQzIgQM znf$0ESslmK@tjPO&t0-nrCf|{fX~}>?`@1L4~ol|;ma6r2&O|vAdZUf2--^C49im6%UwPUM=|M}(l zL9*5*sxie#CnL3{MLPrxOf0{lnS#OIj_Y`eUqLu>01$DzV-@$3*A@sFB1?k;)wrL6FU1u3X?1fdmS&xOyZAlt zklaS$33M>nl$@LK)lT&W6x5eK?(_9>_`s%ITq7Dvoh-@f|Kr;*q#JeeYWqJ<;%wrj zS5>K)(Y=)w!M{>46G|}-nM)eqI*G~c@CJ#)XIsuiZ5cQR1c<^e_!n3y8kUn2ckoei)NJhku?emwLyH&7*`ymYMimTv6?)xg{&}*sPQ;_()g9>VB`q?ixGyv0Ls%H0u>&kq@?6!!psX`=*mKy{=u0 zkb%}%lA&b~xF!x(EqF^Y;#hc6yxgMf$|@t+F~WVbOlKLR>EBeMYZ;4zFHBZ;Fm&T4 zk~oC)ncaNQ#=~gPYR|J$vkbKB;Ye8HXp|AKNz%mm(2QMo8#2Qd0%EVmmz178y6{e! zyekzaXjKy!AOK}W_&0w!c>uEp)P%*xBqrOFHu z=`UV!32(~8q58Z+O-hSEb1KVM^zqq+rxyt>Rfb~Gt|-}&BQKy4QBZdYcRpnN z8qidHNdAn!wLCc=j}RDw5C9vkv||Ug?U;it_oLn0p@2fV@<4!Z2;n&=_aJz`1G+<; z*IvN{mF_FKOXUp}2mzD&?mtwo;t49UWkw2rf^d!DDghUbJTd`ws@R3E9LOz|59Ie0 zL{4t4X5YzjL(JSHAQZ7vz)N;k>Tk;@bgS9}B=lR>U0-K`5pTXY6cl+SD|2%j@a9Md zHo$N*&@>I$&qc3Fvq>}{Bl!w{-Xr$GF<8%gEo>+)#;w4H6LL+a=fM^Lz^A_c5K;)A z>OAjYoq0vKEWyP1e0y<*^crQcMhjGiZ~0{}4(NXiD9YSXe% z=qppn4lN)_0t?JS;gJ2m-kMQ4kEhxqAkwT5nZjP2tG?i@gqDlcAAm5LpnF>vZ9iRjc2LgqS-P}|5eZo@uzI7vwO)JQ z)3F*_2QCve)2evl6_pN>j&_a8mY997SwjgZ^>GylS*tifBa}*{NEpp+eO1{Q6|x=_ zkH$m@zz}LfrBnenexmX%t8qZB9BG!*Cet;vShl}IPzd+nOV96?DcB4D5F0Dqn7#IW)%JI!y(d~jnGgB**c>w5oJ)miCN|Z$&SQs)Rgr<>FqVKn%o~EYC7Y2pv7CF50%d9 z>#S&(;{q^OK=nR;8I2)Ro&hM*_k@U80|h7ywm_?_9SZt1XGnol9w-8L=YufV=7IUE z?yX~__B8Bo-??Itr%Tm(%WcPTTN&UG5jg(56^@KSQgYg!&bJ$>EKM8aQDdxxbi$x9 zL836$ClvIaIf|&=LN|WZ(7$bu=#V#laHy|U;r-dT!Q**PpTsYWK)3VnmI^+&lX?Ht zav55g<-_NFaD^T||+S9u@#`r(1<^0C*UKpw4Pczd%Tph?Lz>jH0NtoT=VT z|Mh5u=c~=LSQ!)|Knw%8mcS_Zfvzi_RF&vR1N=CMB~oKd%>z#($8JzVbQXW6s;s$u zE!yPXXDS?IhU5@9cEqiDyHFWXp@M)XST~xZjf+U}1m%KCJ1z1!8SbKm( zVDkT&a!YCA_GoqwnFLpws-s@2&Dzd|*DL1L?i!Q5^^_w4;B-z(eug=%iWhPEGUj#g z3^rblts_$>nka>2niAy-TM{y3G@2K{mih}+*X?3za6p&{jzJ?0<9t?Lwkkoeyqgnh zNBxOg%UILXJc5N79Mv0EN~AgHc!^eYF8s>IXpQ2V7Hp2HKvjFJ3W6W3%;SsxSM3z{ zwgqT>^aCTCyMhqNSj5(Gt5jseiu7#f5NDsB8rFD6?!1jyPzjO;@TuW|JhaXd*izrE))l;PA-KRI{1={ic+;*~jlFIp?rd-Hh>m)JBTTrCC z4c$tFc3+2e4%J$k^g$AKSvOEN`Ux9%#Rj-Ij$ma4y5krXE}4I21=e zWVL}FrcUA&`=hk1qFg^)<|Ol;BEi6@i}KbmxXmcCtBgKqAZjv!5H%ON2|#vqfjdvb zvKjV%%`d{DVw76f%a{X0hb!dbmpQW zNN~l~D=C!qZlXY3t_dH(7f2I92%_XeZ15y(c7nK$y0vHbN;Hak5g;+=GR9FBc5UfJ3CYsf#{scd4cV_2?0eMz+li}Cwx4MP-8%Hl?PNvZa!ZOy$%PFI3di919AUN z4=cU8=s%%y$QMQ==fZ|s|HiRu%C;$RQVAgfY~=09A=?1Z0~s3btSn4=6<-`q2rMJL zmR-`mpIuD(YN0liB&duOT^i&B#1Q3O6v$~B^FQ_qAk}6-0@e1`>WtbrI>Id7j%0IG z2gZUVz4Tr=!6v4f|9D+0o^L5Ja1;1_mxXk((x*Q1?=o0T+Yk2btc`Qg2svEfcUoq2 zp_^Xi8ab5*-tXZ2#c`YKk}UyftM8o^7ger`dMDWUtW#U9;bh)dqaTC$%_DUBd_(nl45F3Mq|U|vM5-|`B{WX1_r-9 zg13E`|DaF=RH`0m)qs$A+rbK~h7(g$whjQ&HB<%`2{U-8iJVfoV-qLCLk*g|vl}Qj zoB0TqLb7j{ogI2_Vh<2(TU{W%XwvO{<1#W*1B+bfiZyXtmckSvggOAbuL#$;naS?2 zxZM<}ME$S8FM+^>ctpx>5!klGtE0)C59R}s43PT#sboj@1PITIV8c+KSNWctyZ@#Y z3Ti7Ia%GCT^rULpPSk~aXXVYr=}H0* zVlmT4;j;szU@*!E1RZElcW-lm!)_-!%w4QR&{32am)YuWWG&qmGHsAPzKFR%9E!jpFjYH2 zRoyK$S^#&mr|B)4xn_Tg7dnohQt7wLxoESTD?P>vA=lKm?6%1jGJRMOsMCImGlvro1n9lf z4eSSkHS_J!LPJc2RO#;87$Ziqku*2fx|rDdYUj%A?I{36KIkff5q6558f^@is@KS_2k+$Cp5B%kzCy3Xxj^Q2&B zrkh_i1#F;CpI zLp6>MY(B0_M7SQbM?M)e_wq$(R40=B@*x0eV6sW^Y_L$u4s1}R-KcCe<{O0=#RvnD zP$T`8)pwtfGFc~Mg^2;>vkfmRpj`xSwU9WDl{I>(6n=}3Hx*vRG{og*sMXg3YoS$>a+&lBJi(#T0bNeuQgisqF@ zyG3r6arX9oFI`Ol|O$tl!aM7VhxbvzS_Umb+Eir}kRy9~Tl z0S{Kp5GBIq3<<>kWp^wK#~Z#xz1LM0+G{{vN$wE1hxZKh18So9&dI1E!U9J?jC}3^ z4Zw%A&p{Q}U!Z~B2P$Z#)vG?lY#L!JC{?W-%6P_PSZZ`k*t0%Y;hXQB5XHS`$pewg zycev;p(m~(nYFZMQJb2jj|HWoUTan7n}W3e2vZvwW6pba>R!uh$xV8(0*| zIx1>JeEbab$urel7RPDHkJxgk_mpE}q&hIH2&l5UBc#c0^8$F%|Dp)WL<-!+5+@d- zac00XV+cMUj|**33-;}y+PpLijIjnsNyhD`WVaF#VFkCh0 z(&*qF6(oHau@2_F{@$VDzWwqAJIFYCOnS{>(%@FY7tiz49k!+!>b{RuzD$zJ5F)zu zF>xl+dnH1J9v&9tE*hPR_xRFZf_#9)Q2~(!1bOfEc%|$S+ zOqR<7Qf(9%|L0#48Y)IXj^UwB)9||Wloh31WHmoOWKG3pAG)jwNM`Pp*!$9Eor;3i zdFWC_l0mUv{!vZBlJq=2Px|o4R|pF*G7}QCDj+(r`*nLgnIEDA9R)Zf3SdGby~wzw zIcJt1=HVe$q4jJ`WjD8DH897sa|E8cq{v_Hj0d|>K3`sk(WDK3xLK$08pJGu{YP-N z9$j69cf;xxH5_<3G#Jt=snWcjv~3qjW^r1PGES@2o1kX|${v55l;y41>A~bSa}QER zr9wgS8{i^rE)swj`m8d@lG?ypJkM#)Hl$;rzK`~-&H4a9pV82P+>kiozI*)JW@R~9 zjgWYabXyCQqnf09kO40<6Q(eshbRlazM;D)z+HBg)`xRbGPmR|Q#~*aDF8j7WMFP?h`Z(lRr^(wn{AL#j zX${T|8XxV4;~L~M5DzlXVhPXSCwjv_e>c#d4C(IL#ee*inkLQ6D6H$Lv$w+t0~^Hx@wyZsVWL zCpv4c*2C8uT3FKG^d~g_&2}O8P6^cUB*Q+?fxg2fQzc1OX>r=P7KcP#Tn+w9%tM!1sNbXH`(v}Q z1KQ!q4$i;j1p|pp#_eIT+uEoEy^Jg#I^N3sE$J<}osL_nbInOUc)Ll-RzpfjQ^927 zor5wTJsbW=?L-hiQYU4^<`g^oD~XZO6%nV7JtCF}#>$21_gU)U#);z%F|7izy}^Iz zHu+dsdX8_>I?KgE;=^hzAnM!~#xYxoz#;)qH0Qb=pd&+KNSavy@{qF*VpNxo{dS!L z!Nj>~_AvA{PDf*hY#~wLe}etR(=#5T*V4>kzhx7SQOw8IdT6LB?t&KJLxl@ku}p1ZVir!_S+C!*8KmvWXz*H=%s zm&#S`8j6Na@cziY0#mmV9i)+LPY7+(={w3GJK7W&UNi=nuvx0N4t1$@6*eJ$1FKS~ zr5S+;7|`H+XIfsUaS3F{HX|fU<6m-Sz4~PCK1>uRGUsBg_r`poMr!Au#+j=40eRF| zTNx`Dja*6}sg@Q|#!MyYZvscBMtU6QfTHCUJoaaQ5f&AG0f|8PV{k@9(RzMl)k+9Q zjD|6(6ECa`X&{niP7e^Kk@cXm#VvT`p#YQ6KoCa&ay(M`SHhpVsCEyVv=>MqJD(9= zJT?uWg}zi^V#)vL6o2iW0+2u89Q{TZp8oC%el0`mgg3=ZL#nL@;$D$SF=7R~DWPKn z&I>y`2GAJ6hb-xpxGRP&t}v#4)56LA-iEB7Yv2}50(iF*dIp&zDlNwB7pRfSnBA)z z%W8ab+^MfBEJY*Y>A?$Mhq~|$h#aemU>(qO7qbJPDq1|AOn}McPF=|a{GbR5;e=PvvqHRM5fa)DWM_~aQO?p` z*`mOHhW8OgUVceJw{(g(kBHqycmFcOPGwhGJ&V*+LeS+9aFBr|duGpPml|vr>630_ zx}Xhd?%~&&gQRTnUd+R=pqcpvBgW?_6un&G*^X=?blz$0@`VlIunsaXn-QrGh>K~4QRCVzwJdK-|%PUb>=$+qF zzE_$(c5%!gbfbA7l~keo7I!EOXB)0#VL?}l~Bq}lcViDp*2od$^m z4*^k8cm?jDOG)ZAZ#k_BvMeGnq`;UOJTC^Hh&)UYB+7=kVw^+L^f1swK$tCNat*02 z{oh~&%r7n|nVBQ0q-`hQ8(~60e*dL2Y5s_!hHL~zR=fibeH#y~(Zc9OED@#VQy_1E zV(EnI)5sJQ<8zqx=(P9X_Abg3{zkqVSw6}+cfjgVjkwJjKy%WZQ!SL!oM8Eln-u$Q zj!7;WThOt44H#9iiW%Ba3FVTK-kpi@2gv>WcLIzDlQ+@`HqVspv~qD0W@XZxR5|}v z###yBUryAF((t?1eph3U{mO8Gdv1%ioT>RX#pXcdb@{QRh>^bqqPn|Ii z%xT&iO&v0xA`c*wKr!Yg6czPjG}sdZgjPn>WG!&2%AR*^ux`~@8d0t|iG-G;#7)F4 z5PrR?8|_p+Lc5+PZ4#TwR3bWur>_)~{PX_Hh~H3y-)u4THs^yh3g#~3v8^^sk|QGp zkX^(7EbOzHM43hsA7dL(Lym{nSrJ*fFaW#NPD$CYXo6eqqb{3#vwX~dm{%u$5YTCJ z`r&Gq!Xk16v?Nkkcgw41nb8C2LdZN&X7J(TXW@ko`=AI}5BD5}AZ~wuDW^Asxa9HY;WmWwNmIut*@eZ^gSF>vRwkbGURusU%-pB#5-obb*>c7KQ~2 zs*f&oNApMOV&-6O@$=a~jN_1#^&bQ|R6moc;Th@7F`LoLUwe-eZ^m$8G;d$_XOcgY z;Fz0z@IEBhnBf31f}bNXCB~oFSzTxIVlAnz$QF2X1W^fy zcVCL()#ZKmF#tHrd{HiPAN9+XMSZ2LdRY*rMBXej$W&OTD=S)5{=9 zloHaLU#W(~3%60h84*D}s4Yb(>--gJt%=-*KYbF~(%jHtEStH{!&Dj*p{)8$o=)$E zsBZ|9gHqK3*mdz!CF1lMxg#>#BKSKogN?fO2NPq(Sm{yu66hjDaJMTj z>Q@j_FrI(i;W1$lG25X|eeh9a>aT~B{R<|411263;6TcWmy7Z{ds>k~&!)EHuR_{a z!C7uSVZ_R1J}nJIDnLpa(vV;i5z73@Vrika36>tuD9ftjUgx1k3if#TEKRD-D;2@>AXQE`gytN~aM7U#5z}~G0NJHc!LsiZ38D!6}kX@z*`IbnO*II~u)Cl^u z3_|F!71y|`>3oo-)_fNN|3KCK4m<6yv3xQYl^_ol0q=$(ri{;n&sPy1GJupr~8EL&B!qy`( z{xH~mCg{-WX#=F2S?e}hpR(A6k1<9(f`Cc7YB0l3|4%v%OT zRVhcWPHRJa6?CK!9^k3)j`A#@@}CLPuO7>o7lMP1WG<18bd}VFFCG znS;J@&dsNPbCd|K=Xqs=-llgXBNYQp?=_j&uE$G5Ya4*1VR0*${qv0wR@YB}!1L9o ztMD*zm;Qk+$2_ycQvp4%RsA47o60%}4(y3?q!fY5KQxM}BGXBz86YYg6d8n68iwD* zXI8dLCd&^HF;$pZD42zV8mjRrdZ$uP0;*p?Zh$i)CgK)JtQ4`fwJ6HUFP z)#Ti5N-yVxZgN~kplD-f!h_8Kai>>!fw2e6$WYyQZq>x=renrGxNo5b3579hXQe?`&EjdKDL zush?xc?0E*cyPQp+rSZ*|QRFb2zrSQKrBH8I*11;U z_H$NnCx#Y`A6rFILx~(twXmW(zWE^g2F|`>RyH!oXyBn=z8s#uX}OdD&}MwDR|}#z z#Nez?xmG{e+)O6!1;q?sbSny1N8)8+mwhPfJOJ$Zc}F~L7bM+_&zPiXbey?IQ&I!V z!>*-bT}DpZHJ;l4c;+%3)F1v&T+Zv$&cz|?zCbvn$1B+mc@#2Biz^gLjpM$n^(;xS z24Kam4%ERQ9_yL|nrgy$8H=A>-dv!dH^;u%5#3Rqi%&~jh3_H=qb+0wFm%61SJuEv ze00~qL{hh*_0RQD4Ss8o)5f{`A>;lar0$x_iCYcLs~Slm+J(B(ddxLrrE}JrIT;F0 zX-(-_pJZ#IMQPd=0>~5*eM2+$suP|8Cg*xJ7=J|LNR<`~{9L*jPfFIL(F6at{Zd+} zNOvo7tG_0iV$npjtudcq^+KwR|H4p!IKRbmQ}USR19n{Nfuz_7h?5C2LW z`7K57YAMoYv%(xkfzOhTgF}vxXJStUJ!80XEzJcm0wXV5B>1J|y*2!vd7Au=;}Nd< zY>PwCrQ}W}k+ZLXt%7zNyV4+-Y*-Di=mgMHPvC3&dF~|%#MsYxIDg%rNRFK*oU^Z4 z%KiLY`!PO1FWC+>N+xj+iv9&`s>uQ(c|eGQWmr zM^EZlb;rt?pHz6j%CHBRF^I{}N-Qk+CxS&bV9NYAOuH3twHHTV%;#SpWuTY$o68tp zIpmtez@;)uvR={ly^&zM`KNHqvbMs~RvPO!v4gAkTdxS8;jX+?(Tk>W_BXDu24I|b z_1pgIgq&$qq_>b6=--xNHzjR*>)};lZ4i8x?y_Hey~nW))=dPS0HfVeUvdPxFmG_F z(E2A2xPTML><=$4QVnlq-dM-A+94#5ldqbBLi8eFLoRr5G8tiEbi_h9MD-dlCpNSL>%x{yI z!VQ#lgIMS**hBGcT+$F<^u$4MPHhollBi!5>3eXi^13xb4CIv;qVHP7S-nquUrtuG zk{4Ni0^EA(R)gv@m}tHO?E0VdfDgD7s_JvPV zfO>P;m@1&6miSpwEaeJ&P>h`<7({Dp6d0VWZR(DUaJVh#jnQ`G6V$YV;%j=mYPQQ@V9!)JO|ZTKjTxl$q&6(8->flo^4R9e!MNjI$O< z(%^n3oN}cKf<6E=(~$47E8>np21+h=lit=wc1OTgwW!hkiLfFyZ1{6Al8G-(cWA!l+fewhb>rw~+yHzADDE%DkTe4I#}) z79^3B9pqL(Mu?hcS>qH{jjU5n0ot$HG)OEkEtrmgo+5SAw#-6;Cgu+P5^IAW16+ka z(Xh)@g_>OXWFp|Oc&%|-j&k~*XnS%u#FMOv@JYc5h!u00W={W$VsypBQ|LZxdtXDD z?wM;r>c z=G4?=^>#86CA)K#p2Q6j1bu9#cQpyP)f_NMHba1IzkUpsTn8EHpGM)BjqDX*jgZ!~ zpK2g*Yoq$#Jk5Gtm=_LQ$eI9X_FMG?9;#qvL#m&JEaO(j5LgZgF|#ch8av9O;AI0c zFdPZ$+u*c(zn?(KjIBw$SKH3nJAQxQI-M>!$lT)MTsS;#N>Riko;qU4ah9u94~+nl zV`UBjlAWp-p@3dCU2f?1j2WX{CO7Iwc;n010(xkY5p?`NvzcY)baZ{m6Uv^h*UI+! zk>QyB)~|HcD@lSx+B21Cb%xdK(w#j=3JrIn0IOjbTb3+2TCrCSD02{?=u}p}VFL+O ztuooA7SVUM@eSo&uHz^>JcDC^6iKTgtaEk73UD8>E5ej+AhJB{I@= z9>a?)RYDmyxLxF&7h2Yb^PI^J3?D(#*TS2rGgp?t$zGY5%ppx!epd`m)wNB@f|Qf& zHQRjIrzSp(U6yoBsSNB8GJ%l|C;w3-`d3Ijk`~<>cwa|MfAq|Lcmp?6XDrPok;!;^ zgjA_MCm5dP^#>-(>&kBh_@Hym{&|In=WbJq{cqJZ8LK6zwEEs=bhSCEykM9wx<;aF|5$rf8iJ4Ng8J!#4B} zIQV#P77+0>FHtx;Drr)1BuNH)FZSh#8%luP%`iob?%dKr8eU%CMhUQijIf%8M@2LM zb0TobI95xaq6oh&uvesd1Ro5jFqD&5v z7wD0@p-X_DpYXdAM<<`+3DZAUtRiL^ef78)isQ;088Y_@N%!y;;lCL?ub=|V-TIi$ zxi{jHbP{T>sKU0TkhQmT@-tG=W$#R}U#6^Y_WVD7|Ix%ijTk1>?TS!}sb{F5r?UeF z9SvfTv3gQtMhc^3G!J{P!9xV$!L_}hIe<|j0hWPhRoaPgfB#6sc(e#5=>0J9<*Ma` z9g@xyKU$zL7R~7QM}@d>3V?j0xFTGv%KH;2Eq=z0*Xb|->tA3i!bqPuQBs2QsFClf z%3Kl)N01_`Zk5>bs2i7NuYDB0Xg8cP;4SkS1Qh9o1_mMQ-MQ*z762^H zyoR7&{S+uhxQ0Z+^HCxvNMvIfd^+|%ar5}yjfb1k)jgV*@$O|w%N>wzMn}}7fvEMZ z4Bc7*xtaYs9Czi5;>9O6_**{mW2VGL;^AThJun8{f=I9~3p$Lr+=lM83bmQrAHa)T zp9U`JN91JGVBqYt3jHA0H1UQEkj*YhYn4eNb;&^(K~}Z?e5$`8SApi`^hmYTJ_?3P zB-%*Nh%&R9VXc<*yA_zWy9CAj_`dDst0^Rb5=N6Eyp?N>PtEXttQUoWF=%?4198@D z)bA`}LPD;tn?AB#|ALZJbr83?Q9hV}=+*T7GmJ!!oH3^d^n5!0fAbLFLEnvS&f&%| zrTLixT&zGv6w|7owY6OUxS{x#)+ocq@c^B-Cprzr{bWIbUe~a2v&~l5XrfInZ z?B*P%n0jz_S4rL^PDFFghPjU98W>C5-B~VOe}Skos_jg1k;^Qz_lGi@i-2~P;WBjo zf;m96nsO}x$1m)pbSU&2?uW#)&#aC zZ5u19R&Lj(e08pE_q`ci&;q&&t~s-%;9+X($sx_~FpH=t9?3<%&L1H7v4R3h6hFLU z_lql@glSSvRo9Z>aj!TCHm%N~fQ}ec>}_NPevY5>@!n*Pt?iGDurU&skUN!=-o(8! zrpSKD^k0PYPOyUNkOrkkJ~bxy^!xLU9EhK!Prw&FUjm0oiop;;3@kW6Uq)i88?;r>(8XJYbXaeD48!Cb&3vZ0{l z?I#{N^cc2Vjt>A7&t*y8iy%Ownb7Omm>_5zU5p+f68tof)6UAztxniTDmlZm7tNq3 zud`S+|6`XIo@2a1!WvXx~Wy@sxQXzgG0Hi)LtD=7^ zL^oq{Fne!swgKy9L3z*;Wc#d$+;dGhV7vSOqm*rVsbw1h`xo!orKhN@!|eFFUIkcu6xA|;^y9rNNC=;TC>gk5nc+`b_Oac7 zB2FS-P4OT>0wR@V_C^skYiLO!1&qKI2PW3dfbfJ%D%b)#yv|Xyp?|uB^QpnQiA!;C zk>ApJQ1_TUnIq?)1*80d;#pd12%WCVj^3 zlcCN@*8oLs&s>qLHqLe^xk143|3)9#uCqrEjy;gEH;S+em|S0NN+-_YJeKE@wwIDS zoCJIsV7SQ<JE8ZasIbg-n(_YWF3$rlqF9r+|l%RERKi|MrxLc zR9cb6QWhc30murC+_t&^EaYaYqqa$b6Ng+FcqqpyD6736&BbTP)T0E}dJMI%cR|V>odeiA>d86^D0m+LUca7R^1+}rqX2u)zuixx0gsL zYs|ppd7|-b!kn1_OgOc@6xjq(Wj0fjGHFFuQwHYq;01-tA;~$FT&-j`E|v;}2C%re zlsehct!=ZAV!I>oQfBw#1jp{}(VjZjTR{}r={3ggBFrq3GB7aE0GBZl zsK-V*G3N_Kl|y>ybKnG9=};7>MJnALQVYR6ieopli{9W%37{n*@Qst)0Cd(lP?}^d z8!tv6o^ZNK&?)BQiUM);TNvqwIV#G^@6`|(Hrz@FJ0#wTxif)D!Z4xL8%&+1X>?M_ z-&Dqv7CW?m`|M`~FlTCpGM2^e;@f|Xc!@+3VY7o%$frUcJe)NYL3Bw^s)!1tnKMEf z!QV|l@g=@TyTFyOAzs!hPsv{smgp09(NXUQcp3M}^&~eyeAQ>zkpjr2rZ=a8)7Wy6 zl-322(g9|#^pe?c)`o)No>u|vmvSeUmcmB>XB{XwbZ`LPB9ats6u5rACY}a_y=^#z zU;TXq0KGbg{pS0=6ln$J{QIjsrp^ETKNh02fX)7}8Cm>``ivVR6N;FL5FSXW2 z2k6NR;!u2Wuy|3i`yci-c4=(UFq7r{cixg=?B880Y?8?IvZ(4dN_4njmzpD4c`!N& zuu$uc#DI@88IGly_(=o-5;os$n(Azr=Dsu8Fj2GNi8jpf*KX%fC1<?0uuGqn1r!rglimZ;=c!ZwEbd+6E6Xv9bg+EJI5tA(bhz`_w0HE^10~9 zX}ds$)+h8Xd08?*^AZ}3iUC#xv;CcQ23-?(iHWpuzR80D`chkp!(VeTzTt32j3s^g zXE>g-}KnC|z+@&~SdP+?9F>z!k{+Rq&$Cf8X5NuE_SXU@wSpyyQ5+y#e2=)oup|uTg70Oeg?AF3~b``_adAnxs~k zxZNBpiAB$O2V7I@FL(>xd0|f3gGC`7%+T<0!(9fvMr-jb&gdTEenNf1V-cOiE=`OTy zX5me}F4>9rA@$LL{oU2dNZSirW+m})652RVLhJ<+h(+_;Oi|r5z^|faQOVxL8h|VI zV=AbDkrxV#zYlPRg)QwA;gl%laf~pk7{HDr;X(Acs9v*srBk4xn=(+16=An!6&GC* zER>7k*S^LKk~uF7kC}(>W=$tzuc;;OA&K4WLl}q>qC@@1o3ImZ#$eHO3{YaK{&d6M znp!ylPy?N|yZ~U^Ni!3yyBPw8S0oK?vRR?9`jQ7Z7}Kc{*O?tkrWxZ+nn8la8)&i| z$8{8JZ^qm!OdJfx$07uLqt(|*7mvmBnE?68MfCbqa6>dwNVB-m@UmSgl~7XXO$J*J z8V}O9A@&pbobG27fyGS;L?XnR=G;`;Qdatej>VN)J`<|>cPX#hWD%wOcjwkPJE|0YvyoaQI|c=Oij(}N!--sAUS|gSXNYQF zH!8eK=|vcBwc|wiJ*VX>h@mw@SQWiwBB^Zilw67z5;M>(zoGI?2JFt4E{s$@VFGWY zr!|`jaoR~0-OpetmSbXrH8~o_7siyXg$s$*btz<=0z(1OrsdD6s)E2-y3US|atgjC zbId7XUNYF^b3W_^c%?pximf#Er5C?Ws^qou55O0kj? zjQ?|LQOxk-p>orNFc9W7pL0>zu-K@2-u4M-?fn~ zyW+5wZO^7?@G5}w7T2^CPKUdj{b_QOwh4jrs}G73vbr*G0ri|0%)twfsJw0di8>Cb zd4UrrU*3fQ%+X{rU|dt<@>Zw)yn#zam=4A46wiBtsT6< zWaP7b5g*^DcfN3i`Invvx=|B^>Y!#`u)T5?1p?a|7C}%`K*)6Wm1(wd>@ZlTQL=j!Zca)jE1k?)O< z1bb;F#9|t)f@tVQBGCr@jF*xh%S`rAghSWjs`u^1F9;lQB2F4PkG=?S;ItWjnO)zm zLtfkQo(QzX2)bX`B2HC$G)L(YdC+@$fRdc&CRn0t!S|xQasQ0*>xbVA1GQegx$(f! zoLTu|K76YNJhQ_A&;HuR2JZRo)MeUIJZdblIpY9##r8sen3R=S50PtooysGIg}O5V zuoE9aVKv)pzCGL++sJSRk3+8(pdRb31Df_Fn_Zg_Sa4X)vAocsu&Kh0B?sc+>RyK4 zS_mU7wxTLBfP3jQW?cQ1Ppx5m!v%59sZ)A^KvD6NS+Ae8=EPJ9<%zz5OI*Q_1Dj(hiGuYCgD7g;EcM|+do(f`UseicywLda(o|F#DxrgbWB3q@OMWpqI#-q! zmh1(mWXB<=((_gXf)MwUuYo~+lV&RD3DdM_MH?&Rh{x-f)LO8&DTWhm&wl+~h4yHI z&0z@~$h=eYg-pzP1`CAz&9>r=EtP?#y56Gen5^m@uswl`bbmfT!u}}9^6>De1M)}%MjW}# zelP0#-v}1HY9UKY^v8rHqk$yImsNyX z_9leS`pqI$Mmv9JU*#of85O$Wm_09trn*i;!qg*()q(=;vz{1T&m@8ILgLyBNA@}P z&WxA-)S{~&ZVa@T-XWIm1si^js2MinjjL@8_u9pjC-(Z>j}ATMSTdg7u{8?^;dRqe z{Adz@0DCktu^b1M;Gx(yrstk7a73+%?M~h9$Qh{5xhinCLGy87dNUU}G%rUEvudYP zviOw~P>0e!u0UMT`u-X87o@!H5C|b0jXNUR>j^I)8Jx!(;KPBM^INEpvV2D>w{{_U z<_;S~xMhTqL%=3&H|WaPh6`aEIj_uc!uHp4i#c#dDurIUI8w+f&U)=)FWB-mO9>m~ z%gej~vmsxzH^Y>nSBp{UvkG7hg|Rpm@dvXcL!O*M5eFr&eMx+cDQ6;g?av#Pi-A7F z0%>@Qcq<}E)Rpr%OD5ku2?OcFl6!ZR40TJc33H2i8D6UBin7D}e3J08$fPECa5sq_ znVpF~5AtZnQ-e5@z=gS+7TKSOg)u|@bOaO*_QPOT*Z#6T>c&xF0SbS)>usV<_P$3j zndq-YBE2LH-MWOU?`wePl8>1Dy4Ghwiqrifl3aYz zFDYmk5>9lhcSs;8|7{yI-kF-(>jSX%)DgK@s>yNaXpRXX|FJETA3Raw3X|b|G_*0k zI^RPvkTBt!xIh9<5_JMQOCLb2udrP(Fqna~hL-b}i1rB5jeaqg3J(g6= zZ{B$nLISP&Gb&$iW09KDqk($UDpb0xG4UxkJZoG#oubG@vYl_uY*Ac4FfLqD143$& zEDuu!e?)tA?((`_R_|!!2U8$_9#6@idoD>(f%YGuRO39YL&EZ+Sa8Ixk8_{iK*Rb$ z&wZMX9y3qEqN-+sR2lO*YA(#;n~+XE_jHiTjnBbV$~YYAEhJ!qk>Uj`8)>a@;y(&~ zMxzu#>sIu&ha^00L{GV=1U_1*@lG3w({0Wiegw66g#O`_fWzSK|AD3gN8A!R&U1yC zQKi&oe|F!oD)sV(i^ME>SCy^!TWYns(13S#l(c=;322mHe#PPKi9^_(@E-rGJtnb(vZ<3S@+dZ{iCNtN<+rAA!=fs#T}x4ET0h2IpdJ zkzxatkz>Gq6+w$EaBu?Aw_vd)go2vX9}KL&v%iZe_B?yAuna!38@^g){$+HD^Ts2e zl?HEJ=Ls&I-yHxH3J1Zs1SE}Dzf9*`0kqIU%?mcF z{|XPbLQ9>|mgkp2auV2b-dV9dIVk1B^uRK|Dxw9NfIhK~T3%Y0O)7MQqXr`hJI}|M1 zJiRI29qHf=J3Vx#;zI{xLJv74gm%Mz>oI^K77mvzrc3n3?mE4iybqapU#rcU%go`h z^E$ezkELc{7}Mi(yk;ftc=l$*v>8}eI|Y17hS_b?ZKtqkRBd@=RjH*b8tw7Ik$GIS zajGV3XXdc5A=PbhGaV_lxZ$fIoQA8=E}N|h<(wt=)MDtBa&$z$+s>5eO~o^3cXd)i4plJ^VYckA3;U!^5uaXcTNV0OwL6I ze4vA;X);W8`%v~VTsuJW_{#wPQEigjBUpKmYK#ja$}Z6pIVRQ%_9m0O(Xt$h{JlzZ z8>CC{U?C;B(S1B{?ovJW`m5kgWNx9NjL>0R<+0nsJ8sxv@ffAE|^CEw)hqutoCca1{!`IYHLwqFQ+WGWL@*j$5 zk}$Kh2E3!&UyRm}!rkdMFk`~&XP0*;+BDsBY97Bpo^y1jdR3dUh&vsrV^`UGjzL$uSNhqJ%Ib2q5Q`{i-E)_$t)k}#}td?nVliL~c$g?l|fF%zw zL2#$NN$RwyeC>jYDs0r_Mp1U6%vZB@6eFJSLVR7!YZtjfzRh#AmN^x%EjUHQ#S}VS zTHt71{_Hh$~iwKTz3-r_h%69#Vw) zRy7V!jBLiC2eUCSlEIXjas0i%qY^1ahyG~$gA0KF#KFuU=r5uk(NzT*@9CiE zY&Z&Pge4!jT)#C;t)$+@=#zW#ARqiIy)9QS+Nqe-gSH+uH@584L^^Kmdl>X+WA9Z` zYQ_A!ZivdSU)tW@Yk9G&L6wtJuC_Rcf|0PNl>icM4%7wVgOs9C9LKWP#?5z-3lT6i z{kmESdbb5axbncq+q~*{bFPVli2+R3i+orRV0C2Dest8%-SK*n)meAJM)F<@&zM2L zBna56J<&N>h>*RiuP{6BO_d)QpckKVlBF;CPYY4+d*(S5b-cU>h#4XQp_!~aq@ea)ZlUu8dqhNu@#zE{gh$1apPUHAw!DR%5lzaUW7D_= zQH1`wA@*}wCy_+lh9+bWh49IMn5fj@BhQpbEp>I@bEFnJ=xXn~bvTrp@UiYyYc+#t zZ*{#ID3-B|0*-*w{MeMK2}nx|I^iTRYE$3aw72!H~sx7`anF(P~&f=pV_@ z91G7n$Ql^>E;!+%dG^v(ln<(?L*7D~@Q`B)_Igx_2o^5R?+0+PhQ`|hN{&axA;z6=`<0O)xr62ur-4lm10!CXb0tix+9Z7@-vI5L-MFbLi+Ab`6o}&OCFX}$KFoifbsjC4zwmh(SP!Js_ zh0P^r08BWRVD;b6G1qE!%vciOU=g^87>BYKyxN);cRed`(AsrlPmxhWBK`3!5KEhm zcmSWKU|1mDIz{7X)fltpcFoPn0g6(>f34*xSG87~2Jz(Q<%KrO3!MV{NQd^{Q`=QVxKDZ~Vad2iU0(fiNcwO=F zYReEREpp=~$zR>^ZZlNE4K5n|+FZmKhDHP|kMsNNL(vI|yjvI|2UsKHjEk1ee+zzx zbRR>jrtdoYQ|@bIOT+n*sLa8NQLa-EXecBLE&qU%kEI|ZCeg(9nb`+nJw|nk@=aEx zV7pT7ayb!#g%+_pU`oLli<5PmrcGrd%e(hcE+>4T^ zNg4~7VITQX2{~k=_~}M-F6dB`lgKn#qo>%)*&nrm0h9|Xn2RTbfN7%vC><+!NUh4U zdc~vqIC$F(Yc^FIKL3-B0SxZWNgj%8TtMB(NjkB=YHY&4Y<}YjM{1yBeeqx zZp12(ORJj1^K#77VtuQGe6MDJSS<*}>&Q%v;tE8m=L1;l=^FI^wqB)D3@D;edFaTK z{w~Y3@)B4SK@7*a8pBGb%cwfB1+@rLe*IU~Z5DA{9R4y|1Y)CEI13iv-5wCv*8GMz zrh2Hjj6xw4V?HhFCq96W=rs^*-3TYe@#Pb<;o^N;(MLlmV|y$R-mX7_jd-wiNRz^g8 zbyly7Ez7y%nK9%v6fa)CljKbCphGuZXj?O&rDGw%jDJ}=?u0LW*i=X00u`|AEc#ij ze@VLi>kph1y2%UTOYq*iafy>aN#(5-V8Pn19k-yaX-zxvExM+>@Ovu4y;T^`F8!0A zW?9x-hLcn!D3}v}+d^u9r%IY86t6ihWCvw=UC3}=DHM{RZTMAJl}kLjLb3{J6@3#` zs4J2!5T*V@9ql_z>ACMoxxk7~s|1-%rJ=ho0eL}ML0{&7&}D~7CkZae({F|RjI&7F zaR(~Tdp#Lt@bRwFfm2;)cFkYF@KMHal0KPR#Tbb*6Am;oO00*E+C&VD zej$<#R5Ujbg)5S3_bau9b2jl+-8NX1Fp9Qx#&z1xZu&BX9p6(+@x`ll76}nVG#LpJ z%K^lW&nlRT8J*zP1`m0$%rWM08?z29<~Gx2w%-A`BJsuPJWsU{ROFUf(NO~59-{pX zi8N-V=&;0qcq|*-tq8kYTVYozEP3~N$hlz%%+T=Ca%|Kr9&`mupxMtLT{>c&7DDRk z*>3aG**k1g!d`1oz>0J1Dx6D!oA0rUjPInZ;;9a3Mj(`zBBfC73y=~PKxwR`$=$~e1-&pb2JrJ7G;U5n zH4*svbFsczV$-hN-{7V~B`Q@iH z#jgJ}I#lCXuZ`gYtluAU;EzKu+67$T0epAL-%Z=98o`ZDR;WXQpY?CtjdT}#v!aM# zX%PMjwsZK4n*Q#pRmd1vlv8O&o`rvdJwzzjs*}mJO%B9nv54EHVEBNPFoy~@Peq&d zk$j)4^A$!`bGwujXx@yw=zo+q*PmDD1nW9>cg0d7Ijj0_E_>o0^DX=Ctm=Z zzBCJNK?;{$A^R2Sm->{;y035SHe9wzLU0Eyh7oKR7Qjy z7G36XV672-v`Y&a4$5=Ax{Q`W*w8~1u~9`ZUYg9UR z8cC8S4tWQ@5nAE=r7Dfiwt=E%JIyBb10g<%qE77Ir?5yHyePbVz({V8iX5D}sw1(J z4uc_r&dwToH~=wK4`O&E#WM?Laj8H^sm9k>>a{w;y*YQBmWa)*jiS9}uP_=Iqaw5Z ze%V78B0l5KQ}aEAuMfck4m5MajGQf2k;DPH zb%ZMQ=4%Y(d6{DeKjY4!#m;*m62-E1j*>;BR;ucO^H1X%%_SgAjg(lMQsSzEwDNL7 z@meuD_%z=?LFRS7I0sLRkr;CB*a7g)|m-)P|{-Sdnv+Z#wR#&B0>| z#w*<`=lUgbJ$j}y`K*$4@~6Q?rN z#)hnFH3GJ#vOg38a+?W~5$r0C;(heLAQ&};*?PJ%=6e!x3GWO!Iv=4j>Pvvl*_>|h>3nxIXzM}pG9z)Kin8e zTKp*)118^XsPl$1H$btenc_4-4P$cPuQ?$-?(+T3y(NZ6id%uvrGsqTCWT4-3eQQZ zcftr!VPg|3iR}TSfru8cEz0QvGhO&PqCAkgYp*!ArBHn4IK6N|*AG~>7*qaOS?zn_ z*&nPA1-lg@15@*8M{L@E5U|Fpg(x^8)^75I4X7v?WK?Xbkj^47C+y69a>&tTYLMbi zKbSDOpgaj_>{0a2Og2&F@kSE8*@vSa(YE!0TY5G#cI?81F<7c12o)$^&O3E;44&Js z&8~uc-oM6z16yO4jUo~o502t`HSF%0uSol6V%{@bVaDDXTw1Qgi?%6dIQG0$oK&3-Ffgoq!5QP;~WwU?m;$VfcO|7Iyc3>!ugL7r> zebBk#(?%YXOS~PAGbRd11Ob`s!oL~!YYgd1!c6@Ox!-U&?1W5;1DSB6RQc+PU`@wX zhVl|#nhC;AW!!oJ6WJ+aqL8Mif!MqqTQ|dH-3vN59yi;D!(&x2F`Mz0*$!WSq_GcO zfGAggGG^RxL@I>xpxjAG*IlbOJuwV9;~V8%nTMEM8iojl1Y@BH``DENr;G@K#b;y} z{#KVUCJGEe&^Y)mjRF`Wq*T*l>p$q;NKgOB-r{e}Vq_Rd)1%{59sJ1EeuOeVr3p(E zc91YS+e#Or^aD2)f_GJ;2?4f3ZUow|x2<;qFaF$~pp7Rc2GG6@MrmSD7SPUkBX9oi>ZP+x*Jb-)|*2s9dw3No=RA$Ukzho(N%Y`i5V@ba1A zo$6mKo*thqLLd%jPY+F98tGlOm?$W$AVOe0wzvT4Y!wahqflMUP|&E4DXSrfN@7r% z-&Vmx5D~N+H8DY;7QagQd0#C)0tPi_e4BiVyfI}89-q=1C@2%2pNY&zv#fhOTss+OhH=>%b+Uj~=TWBv96Gc@iYH1Lm z?5X$F6f!J&ekJx$cTAHv^?40pe&ttYIG z6f{)Of;atDq(Q+9XU*0Or7%(WY&a$?G2tyu3dKbK4!BWXLhbyoY)%LMV{RU%v{n_ha{M7qPh0;S5B5xDI3z&N89}g3* z7?eeXQ@NRmq3HUseeS>0LW2n9YS~x?LJEuW@X$8EXRem?$L7@G$A%32SjxBL`1dy(C*5pa(%DSI9%7s{&0J0jV|^??{T?XPlM_GZ>J` zWrX+}?J+_~LwW3y@lx^4m7-pH1R!376L?`z+(S>*q9aoCM^svIaILqZ^gj&(ryDo9 zK5CGv=6^D#8hln#g?I_U!YH_lgzR=0k&UoJIiI?`hKTS(KXg^_lm}z4eQ+$qNK6T$ zhc;fI-F`t?5WpD&G9Bna zQtvdWVu|_+H;!r*VN5K;V|}A$<3x}3+aaAA08Pn5!k5>4y%l( zhY9D;)?r|2ed;j2pB z$$>P)QcVf`U0gi@3cy&mrmc{!F621)@wR5Jkh3*E+PhZ&*wX{vjeX#zYCC29FX7;w z6+Xh{Gv!7EI#oeLqF$77l-vrlbb>=X_x3u}i~& z4?KVCqv4tI0gp}u1BcOT{I){y9#e{*DpPHZtb@{^BB{|xm8w)qNaFrOKBY_)(}Nq3 ziHY8kGEq1nkAEMg-YIfG1!YlP=jys_yHJg)Dm=JhB1kT( z`*LvAxY=pkwt=zL=l;szGRj1;zjFLzNP_yQTJ)p~;4#WXF;%Ko(E~Phd2q8IySykJ zNHdX}s#Kec86;abi!xDE^>3B1uPE6H@l@6oKl%}H3grUEgbN6yVd~Bb4^ttJfKnN<(+T|K^VnJ$?p|O&Zk;3xpL{Bv z8yPY&(Gp>dpT{m0YGACy6*m+0sm`99n$HafW@s9`8=U(Q$NGF!y>X3PIywDU%bjv{ zJ?03yuXNRogtvXI8;(e6G*!XROA(nUwpPdw&L)X(5TQWYHr%?LWRPz}1Z!nm$H8A0 z=VGh(BvnRo*`#IE*rWtON2=EmiTK4|lN8Q)p|)ffXq2Eq+=|s{)Fr+64CgZuJ1XLTWEGP+OA@#O6V!RsxYps?*;fz4xgoAy60|WWLQ4ZJ> zZnj?NN+T>Hnn~FIED{iOg3$aAp!wI1O%j|WV*`>zzhskyg98o_w!IL&Sm-24K+sRF z3M2{1LB^WV{Oh$A+mbc#kZNNJ=LM2Ikp95HzEgF+Noao8Tp_QO8bz3Sh`|bypK^qO z#(+*hSR5oQKmiC2J}>~e4;>&>1)8QV5)kt1Tpe5bXnL~3!hn*KJwYUfVnC842^=U& zVgSW}q7}&afI))-**QSS|ELWM1BwC-iWK?#`Lh9v^UXA%pOJn`uN7D%1PD^|Q*(uU zR3!zjttIu;SaevM!t00|7f0XSIb#R0QmjGY7$4wM59lmiZ=0~{bM5)gh$ zfR(0&qa4s!YyblcAOXSoCP|XT-u#d1M7WV^ew{1i$u*)GU1=proP?oWK?ES#MF3e( z0sw#k53UH|TC0LAelHjSjErftO#4+cQ2&l~khD?_B6MyrH8#Ic%a$M(jAmgfUg&$gBK{1q{va;YR=LKU1Dy_zLq4tEU zC|A52EOmvbcX0@Lk}lh7hDVZgLD3D)B(0{$F7SmHqBQ`881&={B4G2$W))bGj6YKf(JMM%-*`Iw7|pwU;DlyO%w|X-%7pY zbmIjLigGN*5Zp9XLYgRWgLSkOJUSNA)q~I@a9u-d*38sUjk_#p#tRyJ^?s_WWjgl( z(v~KQ=~=2{;ecZmsWLb>_5ej0Bm}PBsCs)evQQ*zXVtE_LZ&KNMOTL6y(Jt)*FJuk zd^%cH>MMwn9x~NZZ2(t5sJ{>a?T8~fnkeqYOm!u@GXndN&W)xzkA3J#YXyIfMz(dZ z4GG$i>0;?^18^QnURaZ*68B1Kpfs7^! zOHI`?nkbqG1;|A63Z6UZQ)ga<=D`n9QQBWhjH_R5T)L%QX|1unBaY`qSpcVz6w5b&WdBr82zf2w{ z>VBwx!n-z=Kb+thIJ)(tDSW+6)GbxU!S!h%)#A)V(fp#3Qt8C#i>HMgY#qjbj%K2e zXgE!TbHn4TSl4H&ki$9arRuc~k;>*_yN2T=O+sY)<8b&!=pXx|A|H zkFv$b=3CF7Y;wAjZBB2p$@f}|(wA&8I$d!!pKreDGPc+}#^#VVNBu=6TU>26Z?U!Z zBy`hPY|g1$&P|_hQMg=E#~y8QK7X(|=?=E$yur4hGuT@61sh~e=Zns{jI>FQuRZAS zwdo*(GPcF0GZ`23^%|T`U2DzfoU*xe(s^`^E{Cqo=GbD*9hGl23gL3*7K97F1${Od zoy*o@^f+mB7~%BS7IoL!bl#ey%~@-1`fAXv=5)3;pB)}TAa z=<~)Jd(K$%#oCKbSDO?1T&+cyt2vLWx#{q^Mg6Vzpu4u(a<<-dzE*_MCv_>EN6Of9 zNFk;EC}YbVb#F0mR4!+9NjIBo&FN%s`h@U3mvc@wp}*-Mb2nL{aMQ8oZN}zoGGCJ| z>Qp}Jd`i})OUdT+D49d)n)D}`bnfP4&zodYLMC1FB^jhnBXiMbWQn3h2c^r%Sc~g9 zjBGN0k+nD}ozq)nQ~IKiP3RLc_*_Efphp;4qqI?f5K4EDJ?IUxL;)Y8)ER_qE~jr% zpO3BS^0nnWzPFmg$Efr7*j(-&lg`^>kG)8pqfQ}=)Td*Kg2^?Ve2hAej!}mWuDD!; zZ+dgANoS5tUrsisFGlIpEf`%k#%6Tb=4$>LTl2N$sJG=9b=H`qPTix+r;CnGkBmWw zjLGSbF~{7o`MfdqoViJvF9w&>6)E+}7Ms!Ins7Q?Y(9UBB?>8u`n#h|0EvFKB_)A@@o!nNS@786CB(mjuJQI~U0M`>(wQRmUI=Uz5-s zjmsNL6i(9ToG#jYu|zTDq|YgH{@!xwm`A!C&gO4EchC7^%vqwCi^}H#!f~jy+N@U1 z2tWXUAOHgp00000ARsUx7!{C8Me?!%6aWB+vQ*Z7U6jfU002Y~0|0=8gaiPA0yhgx zH`cFr-8lGip{H@)(AA)KuzWpwr%~FQS5SI@d+e?0rGwrLzW;fmV?s)fPXF>-?s}>T z>2;k@Vq9`W`0K%;M;CJE>HX08Po~86_4F4LOWxkCJ|J6^_-9biC?*HJzv9nDzls0< zd;Q^Yo*6z*?-XZx69RR5rqyRa$#=B>KUDPG<&NKm+r-q(6nSznyWxPoao>LInvh-~esp>(b?JIV@BiMq zk>1$&p=_QGKiJzh`hQ(Cwsc7?%Bb?I0UctwZD{FTm*;{X=pemJ*4LGdW3;ich4kK( zA9Z8o4V~}a26V*=)%0>+xtG$WNB6G})1{FsrrUo)hF%@k>A=|JTr~RW(d$*B-jxEP zX+mI*+i&J1RZrLSFXnnpk6ujpG z{~YI{N%Gu|duWtX@r%j0@BjT3T73oAmsj6WSCSvS(iqS^#VK^+6MzmxaX57JobkE# zgtrMV?&tP)4M0cg9y+tm{LPeTwk~e}9@$wyJ1v6{qTYvc^ywYnH|4)X(nAlr_q^ze z#lWZA8@@^JdN*?4-mG&V{pLomfgb%{WNzuikMqu@m;13b__?N$jlNv({3W;a372T} zRjQ>Oo6yj|Wpbq<$ksoo^Dpl0x#WtDbE5A8H%G_+VVWy?@G3fb53+QKHK%h&-#F2m zs6nMOcA3f@JsRHny_fiO(;GXvm7dq4Pj7GC^&uMg^@b<7u5#hMK@IgpSvpsiIB@wV z%v&#Z-jiFX{{M~1$yLAaN>?|M-qES(!IBXiaNV;LQM>q=MSLkd^_m11g!fgW39SaDD_GtLrUNi_D2{MviFXwi%>(LXtSf>lU zgci`*t7)M%aq7-Z-$04(D`6MAbsk#%g6O&6HrD7(B!|)&BVgx>KHx+TZ9Pi|I>?hN ziaCwW#PM*tQs`s<>wlH_Lc0gN=Ldim`ri*}Y6i^wJWvvUfIZq)s?$DKrdoF z05Ji;#n{fJRIslWVrFgkW=1(F#ts^`CrND8xdgEfWQ^_6anFF2_d+sorBNDvj4}}7 zNfPceb(`Y^KYy?XvP~Z;mQzU`ZR`l zr`NnQTO#MB_g(5tpKf_wP90Yra=eMMuz0xOb-$(8Z3jyU&|FW49_4xoi1LQ+qzYiD z{RJf8WKIy>ODU9}eP_L#;4(sm-UWtzJ2VoI#6AvaBO>r+>ufeP4nC)Dy|iIkMiUuD zmc?lATw}AZQz|>2Ma=lHFMjreY(fS?nD&2Kzw`n>phGV^W1GKL2LhH50A}Loaos!o zR=SsT6*U|c*Q zfCt_G023E}n_$j?l~Ai-?+zOBccb#zH(&+AN0%r`qNMIK$R(Qa!?-^+b8L+_j`@Mp zbF|XVVd%vzi*O(pIEj#y*w3N55r`=>i~J4QH(AaNeo}b^Omv9v0zq8S;h9~b*2+oG z011oUg%RG@33A55KRg})Jc$I;Ix`RM2J>nYF7tTLdDuvdc;aM8+Ij4<;e~F(C}6O5 zX)!QYPv5qq0b}*1SqI%hNeg{N9)+^?`jF5D53PSj&?I32{pj{aq9vW4W5OcGy1%e< zVB4ML{$GDOzU3UEStOHBMkx-`zR+7>5{>#0qChNf6Lx7%K>j6pAtECNOiQ4PTW%B3NH#m$&gAj$~`;duh_Ok+yj+!WH3K6qj=RY#S_9>&kp!v)peGH`%hCL_-bv{JIh*_gu7>Vo)l!CDNa;#y_UXVo!$M z?s;&T7OEBM{aGbtke)G&QuurzLp%rnQVRxb%Zc;5kp%n<=U5lAB;we^R_0ABTQdt- ziIE`+6pez3N&&#y|Nbg$Zc!3{jrYHsaJmUr85MXAP0%+XqJlYUE{vK0WjK}5=bLkF z#eS-+9or=fuALoPVDk5&X+D0tHYLtygAw{qkjsOqn5y^x6;!Zg$Nv5tQ=`UmJR@5^ z+=TXP@X4CzE-}QY`l9^{pfqSMXC>@y3>zCWEW>76`8E$3C{}PP&wO&e=2(GOJY7=J zrZUbo*rc<~EQ>rQNLbC%7ozinbZ<X9I!gxGrE06+#^9)q-M!4xWFa1XaVxgXfAXBUv z>FB!aW!5u94eqOZ|BO@#Yg({tX&m1AVFeR1b+`7f%AMejTYQegBJ*FJ0V@;m-ouK3d17@vZu{$T}!P{|dvz0Ajm8rQ#vBrF#$;kbteYD$L{La6h~t}EpO`CS0F#jb zdz8n_?!tUqU<;$RG4W9puVWp^24NCuU)mcVw})Mbkb@U%^Brd^?Xp1OEU_uZqpXyM zQnJgz6nVN^-lPZt>~uzNmHJqX8?o3E0N%Qp-b5a`7YBO*FY&0|8+i}ba3(Q*xwZAF|}HiD~6_**AhO#$cw%FWYUWIkyM zgV~Y{x}MkC;02G3EamrBMEDx(!MbghOYUZL7kBqDEjTIo z3o`VxjzM_#vMf&2n}63hr|2MAyHw37l|#$`eLY3vGNBL^Dkd(IbZ8XcP3Xh0FYoth z?dZxjdKeHbQP?u{m$!e!ESMN;>kX8r5xj(CoH*jtN=TEVVG_SffaySj-z5T72c)#3 z_yWUdX-0Ukx;vzci^a_Q;=kI8i2+`AmUXJ~5awHvp{Q~LE@$iy{DrVb-|*ZMbFiB6 zBA9rueu0N%>0U9yITX#2+zQPy>vDI4hB9VGmPIlF3nN*t6fzE>0Mijs6x7AG1pN%s z+?)qZnHd$|^R^pa7}iN36n97Qr+>WCFTgq6Ye3p44A{ZN=xn*X)y`}f4t zPH|mIOklX8re&4!6c{Z4t7DF})2%0gFOkHtC9ADw4&*a~s~$r8anmKBvnqN4s-7Fx zr|ac@H*=?9@EZ`s?s6}Wu-)Cc4aOJv4h5e;p%f4_muK* z(F93Z7)u47lS1-5osBv$a^0E7g$W6=TOs~D53FaIBq!w>>8s#bUnM% z_z>Rd{A1yV6iU$06(_PWa@nS!uG5}M`$@}*(@KoIsjneMk<>YS=Zk}v4WF8@RO{;! znL`?$a9#2z3rbdT{RHty&n^=AM2W6AnBjRP1BGg;zQ zD?Xk}!-u|Hii4n(lzh;^s`~pzHeJ2q+6f3rYykJa&Yq80JQz}OaaJzJ0DY)MKea9& zgw2BcCFu=1t+=LZgYNHQ#&`>!>V`p?hNuK$vc}W0fXFchJe~2+VSReT@a6c4Ts}3} zU`>4iXRiTi&cm@yO{MyVa=2+{KE~zKcL=5{R~Hl)N@A2OfWtMzgLX|K8d)4=p`Oh3 zr1~)ec2irnhkMx6_}(Drt*KjqP%rN1LP80ogvLFZx7E40mJRgaWBVCOa%p{r@u(W5 z2uMZr`)B1kdnmvYZLD(3-SID`5rUIVSOXXQV8;MIxWfRL0vS-EvWOA@30GbKY`hM0 zTr^=S&iPpB68TlD$H>GIB@M9eD&vH7K}HMkn+;iG_-nvNI0}!x5ns|N^+M+9eN-4; zpdYoBT&~aLludmix5KdgC;aRgm@RdzPx#~+F{1hdXclmL#C~XGyy^WW5d5L>O1Ltk zSZ|d+%f`O;7!S>>{rn!FA-8nM%~flPV`z`&JBcf4Upl-b%X1BD#0i>k;xD{Oz^TSK z;86@8%1w}>{!2_ws0Pr9W*trf4+s(lUau^;iab8Fo+z_bSQh(nXC@F2E5chR_=z`-5t zYepKf)!aA<`W`d`X-rdCrAq=Msb)s6! zniG^;2ktlz+<=;FmH>->l`!f(va;t11TjrBmWW??ERo+IntOPXd4e zW9TC&ROQ_7_+(0_)-b`MY+Z{-(j?y++p`89I*c$PYliJkh?fO8uUaDbwgp)vc+@kK zmW~X@LhVr4G4Z}XE7}j(gzX3%bI2!*TT=%HBq1y`X7$8saFyFVFO{p@pMFMC@2DDC zotxO*O$L5*%eK39r$0Uu){&GP(D_pa$mnXR()raX$McN6Un3(OWn+Zo{y|)aCcjm9auTUvhi6ePQAB78g^O)_Kh;jhPJ2E9OZU{HY zI*Bs;{TD8L?~rP#p3nghbdQ1oa!@(w7KK={-#KaFc*LrGU9w|eB$`&P!_1j)e$qmQj=y(2*m|op=+{%%gNOshG&`|;(SnU;5 zhb^D$M$clUu!-KRp=jDJv(W=z;^No*36B8qK(1S0DZvBEFX90uFS06tFyu|9=S>#x z-o=;Xcu-)0y4(}e5`R*XV1R>45Zx0}06T-%a*4NjA5<8^OGxSa7*?=v*FF|>T6xcP z#-_S>Ne*mVPzNE*y?Z{diQom*K=3yigsJ`2{wRnj1Ts3p1?;U#<8K$?#KXyjj*nr! zR|02{eyGxNmpuw0A=c>yy3+S$?1^P z!*gJ(cp5zX<(M(VKo309tP^2ql_1C#A05uo9ijOdkygl3jiz~rlBglkw?2 zGU|0^0^$KmPm`b<^`C|X z1kvZy<+lwCIrq8`=10P?j%(oV3QsO-0l2t!ct!<0CB2QTRGWw12$9#omz`qr<7QmG zy(e&?WtMpqij)UK<+mb_SusN7u>#$nMg+iuudH0DLL1WP&A}d@yWKyRY#dyaqJnl0 ze~i;J!qA*UR#-RuUh=0SdRasR?Bm^74emILu$;Q!~aMn>EDP!S!7H5__1KD^3I9+}RiT9Er>u+VU~QPY8!v!x~!R@?zf%clO}@#-cP z8l-hh0igf;-ncf^I?wnd1@5vswBWxVQ{H(OaSn3k*QZ0AY;IOR*Fq(u23XoWBes+I zV3b1CMB$nX`$ZzaSJ#XvMjx^*GWcbtbw!Ckx}F9zZvaE(Bmox6!M0&S$gI0UP+JpA zJ$dM%6TgoJDetV?+KUUV1#Hp$AUgJ-raQHv*);*yJYZFuVc0{gddAD_N@RK}EhM2n*UjXh zSh73fYexi`7U{WfzlTvQ1gP6grWa<30Y*|lp`dmFkK5nMlrGh&0%HJ>PPgVOSRM2b z#{cOJ2w~P{0E{yBPM)o4!&m&<^2w*WLNgAXO{0i09~c)QhqC^u^A*jCQ+~wm`^D86 zDeCxAw_vS0x6S_uF@9SMkB+$yCc`3TS2{$oMRgLkPj$OGr4uPdawnw7wF0rhlH*?w zJ7m457X0Z*PRDR-H#hO(0WA@L8SO*z1m^Ku(C!jv;7^JYzR=24QhSa#A$?>~Nosgs zdLI_Jd=Gk$Ue~_H_T|K4-l(Y+ftL(})l6fTtzx$wW;~M+gN8QEbA}r1>UHh7+}8#M{w>oMISq zjslH}o&BCta?xxsha{{_+E>cw!-B2nKK{U7Q|WDJffj)c5Lp6iFk$vh9-M$p!Tfh| zEc$oZ35shg+=#pk;ZIQ5CTIx4GHf3-iOm-nDJ!(nP1F`i?h zQCpFi{vtvKNKBI2Np-fO4|_FDjNv!5+vw-6(@Jl{C_Eond*HgT)X_6k(kz|Z^jpw3 zs6D`0Tdp?v=)5+sRsF-k#%$dU(8mYZXTzTPwLK7WC(trH>5TzFNT%fb-l);~XTmu0 zT?7jCr`|?kbD7!SS8jXtlXCVEjNNX)`Ryz7RaPt^JSd3~X;zur0;OEtg!h#OG~+lN zuB5bMUl)V1$bi(#FTAZGk%qUjlRhg!z83f)0*#n5P^8a|8J)k=U@TvaqO;!mOuVHE zX(hUW&TX||mp1ZPbE0*xY*OI>PF1CGkUf2tknvk|v^g_Dy*^>WSu_BqA5G!mPnt*z zXJdbWXOzkc50f>9tqvgVOE&l{AF9#lPCg)t%hRQ`mO82T0re_2D6$5|r2|E(khWC4 z44X%@GnP3eG)=7R#YLE?Oaev8kudi`EtVuaV8RDgEC#;xJ3G~u0uKuid6TprykMfy zZ>Np_4X0gPOfkn=Ie3<&=e>rxX5Nf7skyq+AUC`0cbySwv6uY{rRpFt%>oGZ{(3Fk zw*>uzL6eJ6xHff!(}+7OiFdAYM&2oqpI(E4J=T@nK|qgBnp4}2I%wosE>KPR&@2Ib z7V_w*0iP!Eg=YNz!|trPCXtR@F9Jgie}E~&t1kj3(<*B8HW>a-^eb(Aem?(y+-3K5 z94+~oiBJ1n(;2M2%FLcM;aVkHCYASsXudw=<&h>DekPV647u5o*C*R-jk(cBhWW8p z8P_}mLlT>iW`xmmT#fj)2{*=oX~YCK38VX;d5S7Xrc}pNZDhJ#BaJX_o{?s25_awu zPOX&0vZD!T7^~P8(lHt^J<+OMTitshZuI-0;0J=MgrLPiMm^gOO@*;gP#9D1Z`g-@S#|mxCP`?JEt%j zpYnHfN2e%aC>9y~0m8JUTb6v|jwWP-I^j`sWGXZKN8#PY{suY}tm$DR6MR#la5qdg zfP})^O81ff94ej{}KN zM}L)r37T1FoSh_1GfXOte~;FIYZ$&a-%u zAjpyqdZNz5nHNf6OwnW=7%Cmd&0o0w$JdHuHL)DI{{6@j!Y;-4M`!edqL8$Fhie%i z=TA45C(nL`$#X`}?7a-4Su*_j#vWX5Uz=-es^+N<#J2Sf3lt{*yxM(YgB%@KTPS`T z-rhzV1u9|E{@0JJy9TVZT9))Rk)p8=41xa*vCMiVhzNiK4@b{)VNIQ+3Qr=;C3t|O z%e|>~?a`8eAzJL~@t_Ti1KpD2>h7Mbz*tsYjla|c$!AOf@ce~fwkS3xed6=4G(;E* zh;}p7q{%A*HI_v$X`>Yc(neI}M_>`B>?jOp<7B}y#Mr8*qanI+>B zX>5z|3pEQhOyWlHG?!z4Er;`bL*FE3D`0XJATq8%;t9nFcK-TC^58i}rW?d)8XH~F zwv3zD>Pgn3-%h{1wI!*}?5t{d<%|kFZ-9zOjG6|j2bsq&d^zJ7KpTde*rY;RI28{~ zgVFc`a-N};kwSJp{<5ayV8>9?Rj+OlGmd-_{Od&9daJX@VV%kn{_4%`wz`&}jaCGA zz!C|{I)Rtmx~R9wfpI{w(!`_EFXy*Y1xZne`?Z?5RhHN( zB7o{g5jsvc@4)11KgcET?98=*obky|z{zFx+b$yvOzXOZ;@zC2`4RXlaIsyCm~?Cb zll-spB6*ZZ@{%=-by^{zv|2rv_u%+vl@B7i&7lg(QOJTCt1%5Xxo<6=dh>~KfFJmd zJ@Hpw$aW0Wvj`NC^iIk7 ztsHb{;1MgU6rY-C*cHwPrkeor9muBsPSsAtg^m#~f`n7s$oki#!>ESlz+pcKf#UYH zmy}A$7qSE1DczX*{~yOoJs3{WwbeAiV7ytpQ-H?bE_idG>1RpUvJQam_XAcpq9NRH zMHBNvkP^<*iARE7=hoRhJfl|O{Ks+3d6ZC{wnF|@PrKS zp$_a_q#}4c%xio~NrHPi9J2pZ_8RkIoi5 zxfylVgq|AqDkwt0WpN1Pj)IUA;)3HP)o%H@ge#u9A@=a4RV67TXhDO-62lUsqWem6{G_ zO%yycqV#2O1(QJL6Nrt>1_hJmdn~hNoKKg8B5)i69Lg}qLCJerbPz&{+*X*)pfF_n zIw8EZl=rQ>TDai~=OF>%ACA0JqApJb!|t%jn|hYrNI!i!s?q;`gc-2mB&|21k~es2 zc6!`c$a+5IEB=btYt|%(Fh)V!MhKOHCx7xdy>0fC7+Iv0q-r2Wxvd#6(Ja9Uowi82op?R^%5W>McOZsF9eL>n2v ze?9K{k#%bW-x#!j3fjSTAUb^SVuHh-6(~7f^KG!csq^vO_Ow9*x^HtAM?f~50!CS* z5kV*TNG;FA|>49ZjBW<Ip5TO?kw+vNrNxE}?A)Esy!v@*{{x>C z?Mx!==z&zKTP*gD9~0t=j&4EQ;FV#K8E#XLHP4&MqP^q<-X;L_%{Qa2kohMS^nW z!}YCJ?XEyW!N=d*Fh+Tf3tJfAd0GFlT!4QV79c3})}T00<+$!niLHmZu<%m!+*l$2 z!G~!M9&z|$dmSyM%Q&n}KqDjdhy!#HTdjo8#Q|b8J#^FP-2|EVEqm(a$nhZ+h`U?6 zVxL1O9gKI-&O?g74d%;0qV!SNv?15sxQ$7hJNU0p{enhTds7S#<%sU7rkaoBp<1bD zC$dLoXZ}fT+F5oI?xVXug?O*)Uovv%@|UO@ZsAD^*AYb3fGLs`A9=kC%(`Rl;L~1R9CtDtjecFw%NJG|B5? z8Nr8iyzxpd%JQ{7&~&c$Ud@8~M=iyAVUD8}C)mq1i0UI)>K4q`Y_iCjMHI?*x!JTWq87!@ME z?A`r=mNUXywVm^;_PrK<96{;0rpO~4mM0c$Akw9`KaK=08tgfxnWAJ#e(f~o6ca#^J7atN6;%>e({+WUK8>C! zl*hrbF8IcuMabRhMop_M!`v0aG)&Tcit$t4$C9HQC_yk>#4YNv`+L}O!Mj+u&I?i< zess`0aJ0VgPAmnjl2?EQZa*Cq?vTnOm!t{y9$~|Hz?8^VM|qY;Tpv8ZiAH%0Z?(Oe zV=_a_0NU)b#X!>^38eq^y7x+}K6AX#+^*FN(=Wpgqc3TxkcLZ8c`=tm=WBP=9kbqtQ`e<0k`96!eZKK#as0$~f4QG~eujPT~!B{>&pAW%Ua?>pTyTEeAoOTW{U;&a-x* zXTg|`@@@CXvj-cM-UP_7*fZXwKj4y5BbK%knFECf0$z_otp%x)M9v`F0f%&}=N|^c zzKt*xz@qK@=QS&F3g5#2^Fj#~%?_qdJ_dQJW;|g=c3Y|AYzKdiD;aC;)kXqljzJ~b zg;*lgeQnL_(*xkC;d$UO{V9dR^G1jb60rrKytGjlm!$Bd?l%xWIu{`J7=-eO*bPJt z`|{I72Qkl=;@n45o8gT+*v|7qyvtTmWQm+A9}ONS0bUx*ZlB1nL_{U@GcipAH~~ad z233d03q!c->PSd~dgEeNig*(sYB3h=;b0|YF5#sZG$TWA;fk|vDXG~0&QTj^3&gZ6 z@Q4IDYs49y2=hULjMD{kT^4ka36LbVD^@VQg3Qd}Cn6V_l-+>)F2yR0MJl)uc-If` z(Nf~+cm$_liK$Svei#!frk(lni{Mr+p|Z5QG?-LRiB8XE*Fp0bNT34-kEG!AePttLihM=-W`@)_bo#6l&He6^m1S zDgN(@0`F7nPpVjm-No^8r4(S8X$uL)A3S2te_HmfeKxbYWw<<6xgFG)x$K@=i%h7C z4)v<_QR2|MAiMy&56`iHLt7d`q2$by<9$|=^6yNino&qCXs(R1aM5ls??RR z@NmB>4lZVn;A)!rm&Z6v!p6p)_>6ClO*fzhvO=KgFmC$}B;ceX!?qq9;=d~p7bGk9 zTKDh&z@1gF2)b-U+NwgEo10wZxrTA(gx%&BYsd|`6nFbbwVtMC{^6;N6RS(|t1Y~> zpv;KGzyQnoXVrK(wk%F1i=%F+Kt20574(6O0aP1zbr4wiM}xV%3~5fjt=#rhkmlT@ zx07MOAgtTIDfCRUYGN!ut-2xbHj}+N3NK&tmzM@CmeJ=jw!va#0qi0LWD(}dGPt4t zucV<<;36)E!9qUhst`DTRFE!;0R*eU^&Cr5S92o-3!x)LL7#vJ1|uHw&A4Vf??p9CGS(Qfth7QYs~wKom_o@gVU@!~UW@CB+nE6BEa!iY?%mLf;g2p3TAOu< zCW-+H0FsLcXr(N!OTW&maJChpfP{7qq@T&c(ayvOpUdsmleZ{~QKX=Z z_wd8$9MNI|C}53;t-xDctOPwcidP8E2*oFME|NT4JoDCf{=|A~QYYbsF#syg+F3XpU`{^;Ytg5Pt<>tg53kpqJ`{5+X^t6qV zkYW)1!yH6uCbpBK||@*4ZoSR(il)>$XopvY$>u;>92<#Mu~~6P)Bo zAniSrw-CV})t-aiQkptP&T22?bh;ugv%f8eVUclzm3-@ZTsJP|@yUH|54ooUzudC9-`X>iaFe}~HajG~MJWVDEOhCZ-0$^(<-Vowt zmqiM?VGJyU^8gcRSY3u83Y0sj+_9fOVUJr%n*gk7ak_g-4%k7txZg$tWBOz+j+?;Y z0-|8#uEWKu3m~=uWq^DJ>P;Cn(N)5!=adp0G|HIHfD&NPnaHBD0Nd@hUe3_nvnx04 z?= zqT(<;${YlUbs|56o;GdLPnSkl1ZIz?boY}1LEstBJ$(IKIbF`9MR+WG2TVwAU=Q2o6t>PbC1-H`gB^g0o2KgNc!8|WQJ%=L64lyb0w8*_?wO%$4dA9U5!O1@n#>?I_tw|} z(O3)YX5^NCLsQe=UQPW|v+JqcIUwPwMh)Z=@V=g^2ITOu5M?^)^cUma!kcY2c(bxg zjthz{2*WhdSQ~=4JDJz4eC^iab&i#!q9^QH*Kbm8Nn`GN+{Fh;QqSvYRxQ;pGLRhT zYcXl&FJc2JWf`KmceB`~_|`n4!z4!JGxLYt@cM{1G;ucDEbpZZ2Q7Ngd2FLPEB2 zP|=eY0JVe@Mdj5Tm8moqB?QE%YE{<^CNB-%X|dDaiI!+XGcY6)8W z5mvJ57^3jZ#M$s6-3JkH`hp^eV!Q#O)|M8L4|?`G!Eddv{N>Il4mriLHQl~tyjwrL z$>ywBk)iQN=bALvo88mD`B(Z#2bxe<<1c&A3w08S>^rf>0Mik)=wLDZ8lwhHDr`3% zhQF*hjsojD0jqJ&R9xb~28Ai>od=txRm=D~9F7A_p1U!K;2?ywjbf8aF*Y+ut{(J) z^MmTj4wh}+(t5a7zJ=tfvPSfeHeuZxsIq9i0Wy|#xAmXf89e%h>V6z`2ex21DOkWx zw=^=?y!eNWV}v#2u(Q+JS?#ieqJLUIjoNOgSG)L^%}vsrmY zi-oCO9b1gar@ZNIsi+92<#Ft4XZD#OHovS=x2+)w^E3D%KhWDca zKn1PVnBW3_$D;z93dE-(#k;YHj(&(Unh1tI+yzQb8CheNKnz>y+>a&b!(Sk+1qg;2 z@_sVHz1x7KtIJ=o-3zJ%iaq31l38-B84&<)F+gS6ykkt6%`e?EwnjXWyXAVq zJ=sNx$GL%cc3d6m>2Rr)6RcHoeO9cTNnwFOLCC+Mol)EUOtrLe5zo)m2vJIxT`!3F zttj7bq7GIu5N&_HH?mXTDqPG(qD>&?c4RUAFT+;wkZ_Dd3fUc5uN9Z_VAi%+4E`tP$%i!H2t4t`qPGWrtqTy1DAQK(*shX7qyshN ze|$DM+>>Tnsot&U6NkUS%djvNXgP^z2)5;TJUc4Ee_hP5 zRx>w2tHjmCBXa!p8ml=dP_ONx2y^MU1RD=GOIhs!Erj&_ z8}k8i1O^$O{J!ro?`J7qokt6jDdHWB8oR_&NubIZruRbv+MR&ixi6((^Q}fb63_ZY z6ix^3MbQ{PpZI|!Ur1^49{pY^gZ;N^or=aRvGYv8lpw0?6s+czx9a=Nwc*Vi>&xD*uVVvdk|DDX5bJcVaye@fM z7v5h2ryD(vZuJ0Xex7yhjTjuY_!uA6=|x$RjuRxrD0AF^#`Y1K1rEEO93Aj#%K1ch`0(u^2)i~PO;nC7w&9H z`mtg5zk%`i8Gf9xR$J(-_O=L9^?zxIw>|sA%3Ejj_-#D7A=+bF=f{HFdoIB8YpjM| zejx#NRJLUKFI<&m8az-O94oY7R0T!R`kmy$Q;q_3)uNM)9pMBA`~!H6o1_u4YM zQo`ZFxUcY^zDmvTKA%5>Betic;&u(X3hWfI|9}~Oh^d1(KEl<-9K!Bj-qC?-i9?ud zSkm}NVacWc%lf8*CQnTopg*f_ULanW(L_9gE~ODcbTR<)^$>#m8-1Ed4^7oFOTlVw z;PZ}U%5ggKTocgLRM9HQ`3k2UCw(|-$Bs&Ec61{DT#-s!p6BlFf z#s)QN#(z`Ls}%t+ozqxnsINPO1_XE`Y#yC;d%`%dQ|U@2_RG4%kkx z7tW>v2{xfN(XhYJE!}M&znxqywA6!|#Eb=E zbL4)B`aDIi?lq$RKV|ewHRag32{q+IU5blkCeTW3 z{PoYMqJ_e0LN6uUpHbUi=~8pI>t}N?6=ek_M1N;IEm_!4~M_EzK9p_M9 zErOW(gSU0`OD$$E{6j8QpSI%mv*8RV_#F%&K5GbETC+G_<6@O4o(c=LNDvBIy3!|x zv+$Vnq@0m`RlN4We9|aq5h%jNEjAny5DF1aKzu2(0HWAmvcBNf%EtBgF+=HN6usoG z3eM+(xdvWmtr5jIpQf9IdwGBWUs~EgS|*$SdzI3XKdl|>5#|{IOxmUD4hZ7}ic12S zLMEQu&`VXJu)u$jdN%Thc9>Sz2m5G>ORDE?R0*cq9>K_6PDY0$*+ z@DwgUF&2ZVWsE7&_fJo3T5&R}W1*#8ZlM(p5PWF`Ugn@x7HJ&`tVp(*&^ddySSH#W z>|Zjgb}t+<F?_h8Y94?tz2^&GSf{7ipn01)y^kjeXNYsZ zl7nWdESgf{qxuyU#7&3-a5-D@#lSSH=={@ljJ_W36%=7naEWtAJh-e%1&MD6cR7JY z5G$EHIUk*8OdlXlBwE!D_l+sQJu3+u`pOkrD*{TAL?84jmGp7EKoFA1-u8{33uodV z?+7PsnJ$hX+bm^7q6n4UbaX6pi`DN|Bqojpb#BZ4l$n20Cc9RUF13-4isi;#`)t5L zMk)G^NAaHHvt5*?Lmhg3sc;FI36J?>lVxf%jf2$yNMg=rbTSWm)#{RS%wlU2{|*24 zaMEy`x~*wjJiED%i2RX#vwr(hE(9SVK@F5U+316aa4Ip$r`c<$?C43U=%^2lDC=;j z{kDb0LN~eqT#~5LU6h}@!`+WJSOaUUek?o+A~8LmqpxmEKI`%qSKCLSk#s@jSh>zp zDqbroMyCu9@)un3)sf&5%vipI58ZsD2Y+lUx7IgYg7VrVT{O89KH54^yrt3&ycHU0IaRIS5`bXV6qx`;qH!s3Oj*P{*~^gm7pB5RC&&^PNAkSvJ@kp zUkf_vr?M`h!@Ya;Tvl4X%rTkSOL`h%E-}yT0_3YDJUKD!sux2NTZ|So#EqB9-zN&eL3inM z)4wThN;1L?djte1b(sKai+19a6#f3e-Mr71N63uJ1iRpRS2;f}kILb5x2@Eh$_=g( zz;^o!luXBJJe-$8knCcWzsjCUKZfI&GA4@##b&UBD%ui4r*5`q%J2MY2JIRFWW)u} zbxzZC(=v*)Qo|Fr9>|9*qktH03HL#Tw8-s^WTxM1c) zjLqmpa2nwHL$T@n@lh|&{)A(H7nb*7v{`8boN{ZW2*)Z0&_@dl=E{PBE^GPDrzYo6 zPc0HWD(a`C7VLHt}8LCiO=TqwBW z2h0tZO$+s%Dx|E5KO7Qgyt2eWgUVmh772js_kH`~xI0$Iv4S~6N1k`yo@t2y+Wd() zM(GktkBt(9XCteHZxPtc1bL+UzP?E_p!_=foF~tnJy}ZlAvPIet4rX^!)vSlYr}U1 z2-xB$t+N$THe>g;VI>?ej|qwmoq)x$S4 zq3;D{$cUy_!rcMJ-cYN)W9Bf?g=)aOZvB72(c`@bCEFA zfhQn)UyAAh4s6gS_+wOpFlo&TM`d$_VU+;L;X-cEL_0ZWrd0&;NOVotY8XZ)=BlK5 zwi)^W7egi#N-D*r?P_|a2W+q+{747C9Ihq$y{*1sVXqK)G5j?YBOl6YH~&O~8pIy! zzBunT($3mEN79LKBr?b)gFO8Oxf%_gh-nK=wz_JZ_)9HqJG*?5aH90&X}MPNw}aM3 zC!e6ovlPVq(Nhq1yxhE^HV6z`mO)^Qc4U0jiT(JXJ>F#$$^kUp!6$yG(qR(cM3D>S z+=V*pX8D+6QWKd+yd8w~B1BRIf97<=BcrG4Nz757d3)u27^shpM?^3p$MOPY?l!;D zLL9{gD3C`*87M&Nk-;;g;FIT$_zA_L{Wt7R94aCl6c`8!qo{FF7;@1u28!`CCEXl! zVH?d7F-|_vK>ItLBrtiLuGRu1AWqf+Bu1-iGg!fNUaqZPi{3DkvaEHq?M`mm^{DD$*^2rJhoZsu$Ht`_L%BajSD1L>m!bFBk3 z%4(#ET;zqM+{qS%mbZ?$D(KYjtoC#LmOviPq5e4_QO)Cs?pP*9>D-z)wy|7 zouK0yt7HTMDN)#rWiSi#sd~`?$DCsU2Xd>(0^UR2Ap%aOsnVPhZ2evBB9>1+XnCO$ zybj9cBv^KtIxM6DY04L)OnRQwn2@JLMDzG4Q(IL(WZ*Ag#xMnktG3tw zq!$SWnHv@?<2I7*Ttltb!~dP*$Pq>O1pi!=83d=5AImsxwLHf^696yZ^_JvU@OHDX z6Ce$xQ4N9Y>tbW4DBCIb3=l_v=)<&2zr7t`2g&vknJnu3T+*${QXH`Kiv!xNWjFSopi9b2Iie%Psh2KVF`e1L2$4FYc zqJUC9N?@a=*YRxf^q?w40=bwK)SEL7MH4fBFmDaU^ceR-tVedQJzdWTgWEQ%$Uobm+MQagN+F} z_u7!?-I>{6LD_c3#0vJ)S})x@y*2${fg2-D8!CgIGo*%|%;t3CJx&|-n%JIWcMkX#nXRf0-0iH;p{vQTo3x{F>#=sVk1K@J$z#!nwt`mYVK2@9V zClgG`OtTQ!tV~F$5|maF==&FIrC&>FLs9V&^%zknQKakWbGhS^Dj&-n6a*cyx2_DX z^1^s>N`4_Tkvf&_N%+b04dC8c5FC0x0KshAj300Z7nV(W7oW4tDd?PFD;>E4TS9#0 zGB{@aQQGN$S~VCzX$8QrMhSk;QA&f_fy@+*I>{RkQl}uUKJW}}G8|PBXMEi5 z;b2Pv`Y=cYGJS~1;Gkm#XQD86JP>7nEhdeNhJOjES?T@90`vEFYZ(t57L)XMPJTcb zw-db6;vieyfkio)8oS^bX|+5c)%v(3F?gGiLUh!34Ow6oXvymEj+Shnf_nZT(W=Gimp$G~-)IG0O5R`w zz9Vq`OPh8!B_cAD0`fc^@Mc)PA0NYAiPU*Z7}iy>9;of}Z{mE_2Y5RsxV1zJb11ZU zu5)J(ueKwNjDw8qr<91an~W94r1vfeGm*}&=PoI5%RTvsojjv6DZR&G`Wvw!UOmM_ zg*$mVfJKwUS)!EFYLsou@RSJ4JJ3xrj9yURmFjDeRz{-PJcgH8x5L30 ztz_RcwrJA*-yu)yM=k4cV0>Oov-K=;DXt`T zn9jZAyMueK(m|gqY~j2WZAoJrI?>8^btgYC)3K^0rovu8OAMMNlsG+SrN_bSsR_>$= zwKDJGUaGUl717F}{fldG9z?&!1}3{Vwmd`s&9zVkP|^UZaa_9)jbk;U`yu#c?C)n! zd!$#Tz^d$s#PfwJSAGYgax`!%>h)%o6ifAWlA<~0JX=RnbD3F+_;-;w;dM~0ct2YJ ziHl$>3;iNaCr>j)NrjFp(4{{oCMm^xPCGKtq6Htp>JP>Bq1sHmB$kevP^lcAN?RiZ zqD>$H36TM&DcIf%A#(relo31bmY+?{eS&C$L70}#g_8>%Ga)86nxEbD$r&%Zh%KuD zn7$7Z-D$0fJC$%;saDtnP@rQV7W!$wW@E#8G^Q`|XS!GaR{m`ZCECYp9*;SQk_+tS z=p_6zbAGlmBTB#?317V@;YT~{+J6I>{8D0hF4-h3#zC^+$o2N!C2~gUFKN);zZ5Sg z+>AjOjRslA{A@EmY!T`JQ!r=mjfxMzyj*VexU4^PEh;JF1A8>(Na_?fMLedfktZs= zjtIC>cr-2mw72^N@sb!RwxTgsRafCOzO!*R`|uuIB9|z-k)m&j*b*QXDFPW56D(Du z8^no5FU*$d*28Ra1P?Q7Bwix;=pBSIhC-K_geh{Advnq$Q*XaJF=`%<)AX`g*jgw)UJw@%&DI_WfLPOMKLZmb=v}gWy$kMc+p5e;;i6+u{ z7ZUru9U76k)+$|;eq+*$aNE}6ES{-uV3oOw)xoh50Uy+TVxUCFq4ewf;a?kJH~jz| zEkdl1@D2(bOjv(0(i~B)B!U5PY@|$W1Z04^j=qQ&5)pw?vSx_4Vg9FMG_XBsRtz`L zYl+~%&$W<)p8ks0{Y&G^4X4gr>ysKx`I4g%VSY6oJ&lKhwZlZ;!BT;kgzHS>tlS2I z8Qg64qJyKnhbYuz(eS$$QxrS~=xsGxMRp54NhLU!<}JvtJBRwVLQ%#k1~>$ag$J3X1<>9l&uMo#|59p=)ns_P8)m z;j+4B0icVq&q*YaB39GY1>pAl5tnegZ14>tOqzG6k{KjczpC%AFxAi&?(J=T8eRZ| zxV^TQm0)cpW-P9^;N^S*cKIxy(^pS#-S*eG`RwS5M1)G}Et25rO^jkE3HQH)^Qiy2 z6gans{EiRQF>Er4oQR{c6{d$;!R|$q1l$>i&FrBacS;PKcq*DW)CJ#(r4E?g>IJWg zg>DsY`SI^xJ_1XKKm$_-ZZ&59a4}5fd%rhc@?ICgAi~PWCVOIoF{mh^KdM`8y8nC3 z#;1O&9FF-qQk!54Y;VB~s#!lMB+DJ`x7Ha_h5w78M@QTC6e3R7!p*9E=cs${M z_XpM5vBQyJX6)AP@bLLaP8d{aDF(B6Aa|Cb@tD4&KggNBMRJm$=aD-*zMHlc!V7W~ zi2;3tW}_3(N}{RHVN^8OrrH~f5daEoN}F1HYXB{a)8KY|igR;AqBllV-`&?HW9t;lV~ z(Ss`|)FKY76u7P36mQwCCN2iPxqbm~H zX)>;JNpgs_)94yJV0UQ!Q@Gk)#o}KNZ!!NaWnLTDXiZ%HFx8xEfYcy1moP9?Vcn$_ zLY+c_az?2@N1`{$X**NyqCY8f-jkZwry~CT5LU+(d(*Ci-W_rH;lN2O;OV;rj-icL z8YeM)hIW_OcDd55BsgdmonVODjHI;rfIshV@;1ecJ!iuMPEwV@bQ{7i#`XZ>Hda=Y zYctS9=*WL^={U5THx1;L4Wb26wu2Z+6Q!_r$ljhl1~W;LM?0tJMKH8K>lmwNL=A+7 z;;>Rdp@J8Ij9=;S28e)te2dUO#uY7fD(;UKmsKy3-bAG_)(`fjX_^s6&)N#m%svzl zrBPOIv1F!f<@MiKxHA<%smYLEb#)02S;yrNOwwrTu>MG@J)?Oh!Nc|qnfs6aJg)t0 zk{gjv0!;U3Fc0~Kw>9i&f*^qca!Z>jKNQE^^D*K@*j@9Yj+6OD?&X29l-JkT^=J1V z6^6{;p!qcyHks$+a8m`mA?|TQ6&j5otys>)VGM3Q2*0dseIV!#lGf9};+!4>9$Q5U zaZZtnWQhf8aSFVN+2B}lna2$6#FSvuVg6J7JG*e-BK8`mg^tQ>!s^i={yztC_%yZe zN_`Z-_DiSRzpoKF{hDo}VkTm^(TbjXX=#=fa<+%#V#3?&q#p`jS&fp1gm)l3EJZB? zCElK-yG5%bKb=Xq2{57-l5nR>#l$v#AL-&2n|2YB!ODNbd+LfSen$Qqp9mSY0?P~h zrpJOT5R~<#(7k)x86UYj9I{@P86Ui{%-~gaZw&JN*Bs@~%~wezB%;OuDk9)sm11BW zjX9I+?M(Gh1{TVMSgORLUYD&*YywPhpnfKtG**G~F%)-%b*`3zE;}Z}C3pjj$LA?ya&N4!ebxS3f7@k|zn0 zHmah^tlS4%FPy95@A&4Wrlce1`t{{9_ykjyCqQ_&_MIkm9C__nYtmTzp8yN)>TOMU zHTQ>f+-elR^7^`XrYn)!mdF`6hDRtM;(Md8DkmdvvwlJY>+|>z+!YSqs@N$ZIu$x)sEZ zEfXE~8lfUBVb2EqGPRnev z)j*A&lC=@C>!>OCVw(*iPf02b|BWWVMqrAo3gi2N8_XbXyx;81jMjM#Pg)ATJCH)> zy~|IzXCKbu2z~Y|v`RmyDK9Zr%3Olc_BkK{27xQA*KtLfsKZU|#=h`z$El;m!8``* zLDvIH5G`}b0%k6?W4S-h#}UX+t2SKa3h@tG(wC(F&1Lye)+<@d{j7~OfPx*iwZ){w z041QsQMGK*5Cu59`_}>Viov?Vf+hoDnHQ}FM^>7A#`u|unj(1xxPq+z=A1^f(U-?d zzaC@nL^NwfEkdpfM2iJoAi@AT7-7`o10DC?OwY>r(iR7!g2>1wgD(#^;k0U z!+y#qeNif$R{l%FRR+D$6LOMA0v-+fN4+l!zd4pwrUinH9OI3OQR;9+836s`DDeme z-EoQM$}8s+!JaJPn1H64n=!t$)}oumE78xQen9Y7%-;orV^E zf+>cU6eSV})2WHM^!piU3=f57whonwA1a@TnMJ*4S0;{z-X<6ZGjR?*E>^afG`U)4 zqKKbZ8;GR00hn8EaA9uet{iwH5IAIn`=}vdIF#1lLr%gJs$(Z%W_1gM6=E-_59i3D zun4(zmKRU7`K`PXf+WNiS(rL=GeEO+QXE1GZx)`S;MZ$Kh?ihA%Y5O&Ys6rfU(8gJEx_tFhA=p^c>h?QJ z81JZmS*i6aQ)MSU$!bpX=#p9SLY2`WC5%}XloB2tRdI1!Yg2!Wh%VPItsV3RfJR{Y zIO2M0w?aS=dM6t)P<58qRzqlSA5f~sg-HMX0_6K8t|=(r@B9~*l+cfZGS9zTq?a9b0jyqUsd=L*esotf ziJ}r-5xGNVgIBFd5RjI8PYqDG>oiZeQ-CBC2*0X`PJAO~axj|~7nN*8?kBs}hjeRf zv*&_kPRLAV)E}_o!hIKJI95|3aZ+>N;$_-YZ8uYCVkA`XFpJ=ifoqCv^&N!PuO_n2 zl0YHF2B|1X(l&ugRb32Wc{T3N7dPPqN?qm`q+*sCo(gX2*r0C_!90Z!EtYN8PkIkp z%gb4{bDR_?cmq^s8A}j@37C+-F`fFoB;buAYQdTWd8tVSCmzA|=Rrcar_g`UwH3}y zR?|sY-#OwkqWS|OHqVMimX23xQzQfwTd-ti!-yFy@g(k|E-zBKib~fLkaAZADRNZ0 zu_t%WzcqMcLwFPu?=bv6NrP{kkolW}y9I|97e-BrWNngMMYwX7+}2mYY#p>2BRx(8{s*+0lb(|d;I%&3ug=}d7jw^v4PNdO>=jAs`s@-f6uiEoSqrv1VFA#i zD040<+b%lz;|hU?4-3p10v`waCcn!lb&tZ`#{+!p>lRHONWpVaXW-BiKO-CAx@;eY z-)$}J1A~pPIJp(n$f%&mg3ID$@|{p>$0MYEt*m>L92hhx#Km2@{EP2Bc?NgU5ychO z1O1ZZHJ+0L@`?=luaVLRyw!_r-+1t{wsCYGx%>IU(H^gsVl0rOJlCf^%$1SQKT_%7Nfq@v9eQ0L7@SDK0NSaKZ0ZYynUG_J zQuAB}ufQBA1l)mav{>ZWpltExO4iI1U@8dq=OMVl97dWXV zvXW&fmYi*0wVrN~$_DJ}$x>|ODSo4lYwKSqWN$(g1&xz)@+&ZO)JbN?40p;0UjjhY z7U0PM2No?A8@eA0kYV$Hz|8VxXEX4@XS%qKi)^*3gbAwqRT}~>+$KS9YyV}^x3oE4 zv-D++XCM)niyxNLn(BO;Gl$zaNL?FJOah-FVMZq%O+)rMf>6-8b4qy~Elnk+1Jb*2 z$WU8`pui5EzzfJshF;`U%_y8CO@e`>CTMPPs**{!o9|lGtlg=b;*!h|f`aPS0^=l4 z-)=!%N(w7!;STf;F*@%}!0`$+nZE_YztyDdADy}0&6=$K4$Qpg9mBg)E>P&B(lAP>@g zHXYb=oh_$U{vf~d?Y(qSeD?uoodeWG-n<|COeev}hqkhJv$%}y5?x+LL${BaCXmP4 zniUSNG!}f)Adn<`IQ)4lc`46_#R+#m`l)HV%x`Pd*Ew)(NgC`Jc5xmKF`K3_R5lsH zY1+_cznV`GoS&ey%t?30pRsT>$YPn2G!=x@&=M&-yu~Q|;@A%`k7Mu&r)hbk=2bft zEUU0E3FnU!a{5F~7)JI{a04$y8?_R){Pzp|br|5qdn7q@4jjDnJvvx_3&TNbZvNUg zIUuF6KCMeS6*Z}`wO_8)Q>?ac<2o?RiEcDUa#neYQtvO%J7o2o*7ac zGZ}-+N1BpO&?L0cuW81Tv=%uI0uX3p9uf_qGkHD9?s8O($I3iHk!<3=So)wu)X*(% zJr7-|B7GEka)Kr>0XEQ88O!c?= z5>26EJ1r3l<_WsQST0m4P-Q>l1+NI?VGN_SG)#$~IQlVCCp<}SnVu;T8-Ii3V#!%f z)0$b`SH{zY`Ndy;bs*JL<{Lp+GG+5Q3`(LKFz8S6g=%B|o(FZ!#BNv;mi_sJ`Th?~ zYHJ(wBJy%*?EZ~pXRS%UjWrY1;^JF6})5H_CE3`J=8gP6 zNEv5{Mai+Fu)xibsqAUXDKflxqXl~KS;hQ>djbd@`+}W^9Deh-=(09~KTbqDF7Gp=RDfnUnMt%{Hqgr58El(#u z1C{2vE}$`j(D9g8(jeAJV7XV28?c35SyC1Lty*HK7FjI6*QY0d7H*_+>KC9hVy~Z; z7!9#r#tU$EzKNXG&C!H=@Os2j^J@FN$(Wfg@v2CTLp zQdc2kTq!L`p^7l{fQlPYk7ZiEo)nGtNIM}Qo3>viLTl&D(2yB1v>Yx|Q3_dL+946( zk%Jx%Ab~Tzp{xa_NcK`%^@2nGBD>Zecrmk@#jWg@eY1}M7k9Z$%{MU)avn5hcR=27 zgL08DJ0_-;#mZfWfOy6*A~XMazDTgF#gIm@N5NvZ<9TX?XiQZ^1nnC?)R5Rr+DR)J zVwE*`XrCM*rs`xgR(aC}bG=J9(#0! zp7mJIxJK+Cp03E*?uv(Xl|i(nTpl1)-*Mv+E)5NDrL}Fk@XC`A+!GrfUE|&SCSOPh zk(I$1VqV%2ZFb2f)k_WYDTl?u#fH>rfyVXtCV4J%SB&^bt`ByYlBvfOjhsjXd!k8P z(#{w`HeBSbS{hGi9liu#y>Vj?ygAZhUht%7s?qU zMAMS=-n9!R@3?^-E7od*1zeX(Di%V3<2c<1g>y!#)$6pW#LD|tiM)xy0`6oHK`F`?i)hRb z^ESOFHm)B&mS zd$!xT`kE+5!8%_<04KPDg)-8S+KQ<*2m=(WLI)2>XeEfHqj&}F(oR_Y51(HP$gZkW z#)PRWpzI<}K9~w1dJV+A6VO4G{_O<*V4Ic-TyY0hZO@#dw<{#pxt#yIw2fc(DWKrn zBxy?XyODO2xEz6A_r~)5Y~^Td*X$-E<%9PIlrEkXQA;|EYn(teOXU%Vpg7~wSJ|W2 z`og(`E0G#%Za6r)MS`pG6As-b91CxveXoR$U`2t?`4VV^M5?^J7hb)paG(wN687UP zCNL9_bJA@MCjI&sk_wmo-V-+92{BQ}9SA*>^@vnUHj(Yme*15kzlVzr8f@E9rqhR) zb26R~g|S%D)9Ull>5uM9499tBfSYHA%!kwEOH!bo|LxZuybh=u-H7rHqg$nv5YE?% z4VMkba!YGjgZEWFZ{(Lxww;F=lxIw!BQdMnw1S}e4ram_#R3cMGhF{ci&8>xBH7|A zd4=&ve}206V4gA4V{X1lyxKs;5iZpk+Oijvp4Dz}%)8Uc!toG5DJ5~+;8M`XA-Rzp z1u#--bUZ8dP#EE-*!W3aL+s%Xddismg=T|^37`o?nE;oO(dp6mGHpOmi|UYCQQ>L0 zqNap7i691_oy021o%Ft0sDu-iQeqo6*?H#%taP`Rgb=+3N%)$lHiA6R^0I1E7+CYy zSw2U@wZ=d`#Tf*PsnClg3Pwk6xNn3tQ#R1){ISRy<_s%>4RBHJ2(9k-s555^YEfEg zQxClu@(-wU*#r&kV53)Zk_g zKbzPD5kfafy)i*q*xh&x&ax7R_-hC8OkZwq-oIeiTQ4CVIQ`a-f_me|ikAGQW zD|z#u6}U|iA?{blSlf!P-yZtMW{?h@cExPCIMr&Y0i%AOQ1;>#h4YyAftZ2q19evV z^N4Rn#PQ7`@xUn-cs9SL!2H2X^B_x+8Ewl+6}y_E*Cxu!iL#>U`cFEG zHxW8D&dK_jEio#xCQr_LGCY*ImR;B_7mg(0vp|<>vWkEc{8ob5G{6HQac8EE6lIt~ z&VKPQ`NN2R2!>4If^AOpfccj|zyXZ*Fd+7kY z#_ZyJD<%x7d&O5kK$-Y-T7(Oqdn2gbE1Dbpc854Mv9>Sc)bVZ9^aolhdrjgL?_ zfWeWt%)U+k-gHplzG@h3wx(oKu1MoV+0xSDsjDao=lAKfa{fk)8bMTaj z{)w85=OeH5vl>pNwLN2aAVYJ%QI)DpXgBYkRQwq{=qHahZ9xW}TZZYOI{Bs(D;c{2 zivXshdzso^Y3@Zp7$S?fFLKcCrqdftD^8S`uprHo3&f?gpunR%JQyV7CWT|}(4s|! zi?M@&H4J0tCv0Q?2QSbnG)7)G8PYF%hAz^K{SGss`*a9VD1j+jH?TA`MO_-jiQvXd zS6J=10lCAh=~1PFsoE=Gi`QZ+Wp~+OihJ-=`RT%0LMo!@RUR8YildR$s{9)uUDHmk zq$LoyWCF$(6_nIl{w5Uds+VIrEFEsI6cXfrr|GB!@1DVj_v(&>Z%S&sbW*`oEQ3m>aC`s2Tu z#gQR;=FNIVMsk$^7S}*cuP(gN>6}jB>1-?=tn`~(Bqkv(^@hZo1!RYYVMbC`XJcp) z+6WSV5|tUz!mxm^j6%sQo!*cC0wD$2P$LiOck^i&-0?pGZO zm$KawqJ2gJxq7%%RwP38mJYB$Lrb`rWT-!DtSBryYsHm$7Ly9{q8#LAD$4%*EM7_R z;lS3naS(R46LrzVWpy?_z=TTfu5_a@CLv-3xEoRnsfR8+41(33D92y#^RCInWRwCs zpo{|=kp;Cn98EFpQHYZnSlWP)=b>QjIHJHJ;9og(5XFgPs{5U3*d=$p3j?&}n~*~l z{t6N!QanreJU43-^S#`e1Cr}`j-e_hz~%)048Szo#~x+m_Z4tEeJ4^HEAr}f)y2gisf9zKmzOG4 zeveXXGj3wlw|B&S>?@x=PI03bQvW$njX@&n6jGW+k-9m^)g&PoIRSAV*n1Z7=jy`O z?Pc&{bX5bFmSP5h1+?0!E#Ps%Vyd%bTU<^B05@Hl(m~P& zd@U#QJTnS@s9OH=&ZyG84CJCJvBrX%{r@g(IHN`g+fU~$O>D-zhrVY)y; zA&i#wkfTdwijqDxpxg8EDF>Cl6(dwO6TH znZV0cV)p>u8h$TVw=@sDl53_mhg*<+xU&+GpJB$C7_DQiA_C~2)8K;jGV#THGjqK# zmc#m12O^6*k#zE;LdvKQGLi&E{(jRVfZCj?0F(dqV!)7*MsbF=yfaphN{N_m;b|1t z;D*{_JN?XYK)=IQRCD2G@Ie1#l=a+nzzeRuAVLBmoUx6Y?+rTD-LR%Iyu;oN?v#9m zx;Dzb^gxcHFq1*UbxJHfxu?IJlB5$BgpUwGtxSCoBZ(j;`}>-#yI%c4X-#kn%Q>I9 zvii2pW-mK7Fnu;TKAv30{UVB0a39$QB2`37j#XAw(cQUT(x>!XMJjAVR-_BBmP$A% zlzL7_83d(No;6rcs>ufXA4^Bd{hUOC)q%Ko|0AuE{+52qV4EO`6r!M_UAM`E8Egra z2soXXbxUB)5DGR#(HKSR@5N^i(@RU9ST?W)MYAG8{fE#=Q>BPAD$9!Xgy7dpN9}Lw z+g(f`@Jj&!@nmg(vC5#6sl^gMg~S{o3sr577m-x^!$y$P`@suAFp`fcyJMmtg9Gpy z#hR~*8Kwf#s=+5vutF)2M((yzcYZB%A+BGgKFiCn&aYQ3d)3Yor1tN|94XSSHDQ%> zadFpD`fiKt>Q#zIvTtXzSW_vxdaPlU^F;XqieaL$91)iFrxf9@IMx;WSA&dTS_5tP zs-K~)3-)z*(V3Pe1Q-&6uLuqYdu=r?${!YOg`abM#83M76em!~8F`A{_1zJ3YHGPv z8b5}n?8m;sJmowmqM2t=ND!nOp5?kt;z}p!KY6HsySL*f4NGXAJZJkFdCu0JdAaI-(Bl`2X=CSPtW zH1aLV2?TjeWxG=780cpl-n0rb8j+f2%~5AP0rh*%#iB4-G9v;6YJ({lFy%wTG+DJHj@S6?$b9q_u)Af zjQNR79WZ%WE?K@ESsKbBAx;+x6JYUhrFDb&Mn&tVF_@|!ce&gys{lsD55oAjw1+n& z$RZMJ=<1f+7eWw~lx$cQzaUPCQ7}SgockYYaT2mt6?z$~#v%Ok;kO@2n=v&!1ReT) zweMZvf|ZD9;KZgHEE|F?GZDEzYxKxmJ49kf?$jdPaba?;SsU|f{uPtzy?OfcG7Cl} zSzS~9bSyLYfkUQziMU@sws6I>4KZ_altf|O1%R$d3mc@FjOd(ji{&Bh%z2FWAVl`| z;D&xO(qGpq3UaVo8D0+&?+T(7Vu0(Gc~E7{m6_{cRq`NePj1E{pGNGBtIKr76xZ;i z4|%^p70GyK;WP<ES#iG?$?uwS7x09bJ!!GOwv{GuXlE$LIc9kd@Q zxwPFjfp_dn3FfXZ)qRD$u|z&V{eJ=aZTYLKPH zBRC-Kjzv=S44`K`v@Gt)1NOuUpi8kuoih;UFs-eV_ELq-^+{6t<_R()E(hP^xajP@ z%!VcUE1!G3y{_ubk}5v92+IL`@Y9Ib=fo=nL94JM$I@!OuebL84+^fE({(DuuJF86 zvmF0ssHScKQ5+i;o9?!2w*g#n)keUJGjT0HZrvK$;L(sBo1}h zSzQ=sEHg8|YkM;J%-JGa~!kq@KVQVSX zdD8}86*7l9MpvU{>JS9`&Jik0mP^-@G$T%-D<}mCpKDz=Ve&yETgDmoV*)|0v|^0& ztoFUhAPovc{*FNpPLSH%=5U@l&y1%^;k1rp;;*I8_$qhxeCSw$*Rs%2uKU!96srKf zpFDcjbT=s5GRJA4vUDXNBDN4jnWJVFiN~$AJpI5?^b14t&9u;3KnKj7Q8FZ6JtyBh%kS{9~&cM~|$Mm+*xoST(1ZN5kQYpWZDQK>0S`gkHd)7x}(cX0&rZIjV9HQ zzD%+sG$V|{X}TU(R53~m;M6#f^QC;CWeC^&4*y(487m~?Qq77iOQ-gq8`Q`5p;g}l zbs>crFu5jGSR%9ks)fYDSfoWO5E>ssyq3Vxc z-h)Q6p{c7{!l8w?LZhGQNl=iw0_Y3}_~{ zbtd`7!p6J);|P>5p@3xGA9DHaJFW$~*38A10haK3{m%}3ue7X4*u@V+{bXHnt+bM3 zIf$o$gg49U1Mbtamm>O)Pko0?X4>J^sNOuoZo-go{e>&|P(zA}9p)QA>=cG+9CN(w z|8fH8kZW4*kM>?kq?xU4EB)#{`R6DCaMC?$;9h(Us*v;bU3$tir%-HlN+4NZuO8AB zpjUF=O%~GH!l7u78wu~3(Z%$jk6RD(tK3+Xyg=ZB<(q4 zD2Yquw!74w)ytuF002oqw!c!g5H{vc*F>PuZ+!U?Ro~fUfUX!@SQR~#prJBAcja`z zW+H~IHE9D>(D=R{}!EC_|rYb^PbG%7K!!}t93>#ZoC z<2v=I=4FLlp6Tkr%;Jj4PGdss7W}psQa6O0Q>j{x2T<*SMSr#OSS<}kmf&RMBb>UA zH=81|!V8ex`=51iWhz)`v;Z_B!RXm+v@2@-2tmH6ymc>ZjrvNo?l~5E0pddeAaAoj zWl})_Tqm~xw%;WrQekI_xp3uwK|(8{I@b5{?u~##F{;tkR#H4v`u*cqs=$UcS0Lfg zWPi)R!gaOo3nE@ykDi7dcO=xn3K9lQkepZYPwukAtibVK1C|= z1qp2Rl{eNCl_J|XEyasgTM|;b0UQp9)QfbEr!$ta#I5ruaNJAw3J@~Ig1vaM$e}mD z{ET34dX&5{-%hxon8PvFh8wdmRq92}G z{tTT1{6t89j0@HXFvAAD4DBZV^Y{p2`Ucy>f~x+Kb=X7{IF$=OE_T8 zu8_x}K~0pj<%>eka1|?VWcQFg%rL+{RkD;Nr>1)d9DrW7iW%js3J^Ju8WBkQ-K&OX zUYr+07ZsbHrO2#`?1-Hix0O)>D3n)6BpVoDC+HbeM0>kZ=b`%x{(px;b--$#iV~cF z6RP#R856Z<^Q}gtF7Aao2;;?w>A|1FY*~9f0dw*v&J~Cs=;equ=Q6m&otorjINM(* z-8zHTSnzIdJ_OHye)?0|faF?)KtnZHzcibF|H5wJ^TgF2$s?UK3kj-GlMG-X2dB*e zn^djyxtZ^#ZMTwcQX0G|K*#vpkUsIFEDiKqKymg+#ry51y*XIT|H_t6;qhloAB25Z z$tP+H2X#^c1Sq|p?-C?{;t_T$QpL8d8A1Yg=58{QKp5tH)+}QWK%kR4dxTRD-10(N zL2q&byWAwu&N>o(+QOxVj&F;{^$ZX>Ch1cV>MComg_AKzaigsl;!3`>{rpjkFd-wa zn`^)=z~amB<>VkAa{La28Mth>5;j5)orBf{H{HwdAEr`)CtsZ=$1{M{tf!$L zKbT#*4eSP#L#c{z)+;BTuIK5H_DcZr%i?#udjZw zs41+{V(Cgl{Pjn(-={9Bd&K1Pj@Bjj=`a8}xHM^=?W;xPmK%Q!1N&sOj>9Gmt&9sb zFqq&ups{4PONHUe^jiemwM8dtZ<-9CX>vlEXSBPiW-9qw#Z;lCP@-t7o?Sv^75wTe zSU$y_jt`0@D*5H>Q|p1f$sC9p*5x7kU)jWD;nwz2i+e5?Vs3sfLj_W4YRn4 zg1>9EKGKx9rHGTqgSLL*f4yx*;{%k4vwn7j`a4-@v1_Z9w+;58ga=tHGUB(*v*RVC zIdAF9Oru^Aq0$|}zmm)@$(_AEG*}84i6Py1k#eCTf|)B|a`QiVAEDn(Rj*%ZXp!0t z(aMjZwrx2VhX6`yl2+Lxt!n|#7G2_bu-J1h1xgA`SQom9bit8_dHi~`KH!OJwgr80 zfj00{uojd}HBaoL&E)nr35U+*&LKad=6N0}zQh~=!bgj$I6~zSvG`z|(i&JyuD7wY zR&CmyuQo|2c&vM+a-yaqHYN|xm%Tcp1?fI42T$<3B?uI+#6na2J5}yul zz=k5Fur5Cp)?VK!935RCyTQ9fP}12nr{HgEu)dI=TJmxeW5A>Tb_sYuQv*tp|5#?Z zB_Xvr#*Dn_gkAqn2)bPIvvs4heQgN=;o$67 zmGuCKn$+=&&D!as^YEK+8ja+hZunE*y!=3M#2MP~-4~ta$_yecx8G`B@p;6jZ|U#? zC!Yp-@>X;XZ8C*BKQ$R;B=#BAh&M*|aWLG4dffEZL3&PNN%rke+>?md6;OhJ z1^5L+2;k|a9gPR(_$us=htC^gG5I&=>3w9@m? zl9yKEXTnYlvlC!WdS!ph?G1`^_v#6MnWp>m9<62Qm3}MkH!g=coVs5_j&0Y?&0wU6 zh`T%-HRLIb|DnG)FEwGS+;*ypnIBZG{}5-C(y0I57P^z8`=bn0ltEa4oSndB+! z!I)Rx9@qVXHaQA4UG8QiUEY@%{kW!-~qVa2UoWj!T5? z3GL5~OJ`9+s_^6p3`gma}3`GcbRML}OtLRu7awe1XR?^Tg?gplXq zSR8K)Y$+$W?*71O^)ofOS0)!Fa_AGZ zX9o28WGd)5mykV@XSc_0+LPSbl5_5+6($6Zj2ervbPm6FlD{DrdHJuu9duS%D(vL(IxWGTI$BT(S zbNUN8tMj*B5shT=xbu}P=)W!``xLG+fg9jQuFwo{@}JLS6QmMq9#6{did{xhM1Dme z#!G9}U+TOEN5rG7*~mRj_sq$B>oy%>8>e79f;uS<#fQeN!Ryj9E+|#UsOqh^T8$As z>{9}RE?4wI2N;gsB%nDGcvw^{+lKD?Y_1Q^8_dGOG^#UJ*!)8Lx0ZW#Lq(=p`vNhO zhcST#PeMSJe^oSW5UcSV0~n@!x3my;-2w7?Y|FJU-y9HJm<&futCUUJ=N~RqgrqTs z)}opWU0Dm4LC8*qw5IgYBfuFXrI%a{JK<1lGlNnp#iNs7sbi9fnu-w=4T!-&eo0uM z!Syxn5QaHjNK_WP1`r-xqLtfFrpt8!fLtF2HcsvMQ-LC|Aj%JWG zRZq4q7jVI|8t@!&sBWVUl@&Ji4Kby0`LlnvvpD<5slp=i&JnpL&=X7212Bm=Jy>b8 zlK3k7)PRWFZ~~0D3Rlbs0XZC&hbA{Yk9CR(LTu8vVDwDo98I3-?+ zf(4=iZpAtb$I`;V$4RwB*^fNhl7)mjV%8j#&=wtMNyD(UID-vJf=dqw;ggTEiU4b> z5Z$}Rc=6Qm$|E=3PjY%=l9s{n_9}}>wV0y@89EFU+EM+zNW?Ii&=ev0eK|5IJe?uGJc(fj30Ynn4L+)Dm7Nbe*Q^oo^8_ zlOT6V4pWB%h&}^yN}>hq0@{0BH(ShGJ@ghLWX})#2sC9211IN!yNV6choMmMEppHj zG=?5=YFm28v#`OId={vzW%;to%Ns(ra>$}9rF!Y(*nn2EJ8DmJ8i?O0bO7GjCa8Ss zTqOrCQc;=Gl72OknGLoZyS(F)t;kNT3n6!~avt0{@$2Took?n02 zr(rUn^|r1foLGl>$Mke>5{(--1cxEbexS&Wf)1g^VN`$@5oF^=ZE}H}fxA>5GzY!6 ziI}xJ$SMu^wPftTx{PtShL{LnJgYVe;K@rB&9gr<6;lmo`}T;Liaqg#c{#>gu#AR4 z!k(I-armhohzO#n4;Xmc{Qz9Cdwgp3I7P!k_TJmh#Q@m(qcOgf#(t!EkfyGvhGoz~ zz>#_ynuDKBz00xLA-B_s&8z4UrN9zR-nJ8f?0=Fh=b%M?{ZZt$t~$6Xl8UsjA&Y@%;|u4&i098U9SX;k7>@TKEZ^?NQ=ur@G%iR}%QpDLbI+Vl zR#GF^8Y~fExTBGNI#Tz>q+3*<5`2GNzwpXxThcyrn_8Bo)wWBtDFE4b%~Q>~j*>`A ziiFT>{V{S|?7swNHnEuKoNN*sh-J*BDZ4-vgv`B%*$A8GSvl)Jx0wpGF)7iP?7{~|+Y8K7S7aC(jHM39j&k7|_ z6bT}nb8^(`+R(HOy|wBE;;GDC_4mhTDuG2ldlG=KiqH@A*1EJJozaa<{i9Z(hO2VH z6jNT7G@u2>9+QQPpEbk))ft01j%*@`bM*DzDLhRUSg)&xPU@>(^&LKaK;(W#;0l-j z)jc#b5G$8-43+DNIv08X)}k`Xy3l$ zwC&~<%Jqw@$b>_>Wb5FgD@n@p3GXX@W>LE~!zBC`KVm)#nvLjb)@R(V{aA2&|wUnB=zYmbBWr%{Y~yvEtm zIZQF3ld>RQ3UE{o(!6zj)VL(@X$$2b$w>meP&5F6q*{sAaO%zrC-*`BJGo}WPE<~~ z_~_Z#c|F_3lt)x4tbR4c39n7=Ei>VyC?rOq=WMOZ(Oe6fS!1DM^)|G>hx|gGJE_Qn zbx(L>epzV!!_bck_s>b|=c{nj!O;dh1u%$wtW87s{~fmzddCJsC&))|ArD0Gz=t}( zZeHS@Ww^-50rQ;NF82BMBuQgKdzeQS@5N>b7A-R&0TK)Zr+bvFQjP=!w9qvzPRIRf z)e}HmW5kdF+a$GN2xGpDuV*O>aZk&#hSkFv<}s!y{~>#oGnF#h{^YdZ+(sRjd7#=J z5RetrK?TtQB`H3wZj=+0$nI6+ln>b`JN);?w&$>dF7JlK9?B3=eP2aY`m38qCPTWZ zHjMS+T>`?*8Z&y(=n+Eu#{ml2x2ria$XW>8RDd2G$E8DU1@OmDn#JoEgfYAVU|E0~ zkm;z36J~_DhZ>D$*}p0P4xk}|lS8SIhFh=EA%u!o2|F7=4p%WEM8l%+N!^)UU-cHs zV`EFvpUuO~FgoGN-JyDBqMnq=zQH1d?fuT3EnZJmw;gbhpK1uFPxogzY>dyM1fI8s z(Bmh=m);K_qN2NTMb9Ka(tNt=B54~zliI{~DurA_jwM+9u8P_0w=sgaw9 zh{fE-;C|KG&^~4ZdVE;)=ui~r2|!VizEQF_sYJBZ{dg7+hVZHqLIc+l9067`Z8xu> z(ITrMe#mV*5ZtW8!TCXC1%hg*IGo965?Q9dNSuVSfpLAoBugJ7o=cHM6TcO}Dlc$$ zlqp_iFX@CAu86&D>R>8{(j}4C2NVEsmp8(t4pVF_7< z-;lg-z>kgrijseD)AqX#bReyZ1P@q~0xomUi+eyG5~y;PEIEKU%QQzQY6Cs!LXYfj0_Oa z0x4VDm&=IR+QA=d&32Fl?W~L2>YLFplF=FCx=CHCR|oMZ&o?dWarfJ=Vj9 zJtKE2ztX%EFyht&`g{z7tEW6@mO7h>H4KGf>5(FpNeGf8ow-|~v&335EWa8XT3Z%7E`?O@_<;vp(`XOFVAKRt$7!w16;$kNFP7{$ zxl!2>!hEM$&8IS%@j^vl3J=kGbP5P7-Mch3#PJMPpO0J)t|DZ(tg1m7LqL3F7Qdmk zuvFS%H~Mgm*vuTHRkoFag##83weGNd{SXx~w!4->e`e_?s?$k?(>zmY9+iO_v6=}z#4A+FAKMSM|DB=0pbO}hn(EV%Y~=Ta5-ApAS%TVw*Dwz8-P0#|fe|Ex>iIY~zq0aJLN~u?CNu889%x~ld#GJ9q z&!uo0I(5(Iq010AfRag2+g+P?xK~0#Ha>c=bLUvE7HudhgVF6C;mJBudwqPt=vFQu zU-guAmexFU#9KuG3SLL5cG4wd1H54e3>3c{N&nq^TaDm-LbhWz)(JCY>);LL5eLvY zJ&DYU&~~?VIBiJ*qi)SnC#~qO*4IN8y={njCz`a}u!>bYzBXa!+zULThBz4vxP5ifLTJ>rRamI@pd_CSh%&2VImG?FOqIayHm zD7g7#Om68HnW3=N{FZ3oZ@h)peW=KW?qecAoU`t0teJJBtz*KBMjnv(YJKAL;CD#& zcR?HS-=ka6L2oE3gOjNIBCbp(kEeQH;chA?qd)gwwe@4(zPQpo)EOfWzc9uUlh427 zl_x{?$N0qgYAbRR^Dhm$=$c2??XLF<{YjcYJ!mwn??Q_2)&UNfBItV}&_rJb;h#3}8V@K9 z5HLt9wLrMi?Fh!)vF#N)TH?zakvtr~5AFc#*8Gx=4*Xn%fU1rI>S8t4rVep?b-e-r zy{nrRSu{KelOagNV8BN(FHeCysD3)#hh0`HO|%ln4{};c*55Fn2R038`0UNtx_qNmsFeMl|u9YI6O;mtk=3S@qSX&>hzSQPw&`*N4R z$JulQwhnE)fe@xxA= z$!!yhKbp@O*>0tW{Qsp==y&ED^V_66vp+ZW^dQl@A+R5n>G6G$P1rO!GSrYbV%Jwk ztQF&GrqU}rU>aZBzqFUtwv-D@iMHr07VQaXiGaXCx34KmPRpZWMt5!%4`h!Zm*ou< z!I--u4S>gT{=i$O6x=JL7G;v4m=%5*Nd60UOJsy?Gz9OAt+IAEK&ayIg^AnV!-T+3 z@I+dFPdq3Tl!wXatvYQZ))b&&mJ+`={l-J%y@4j43Fe5C#9`(pVr!tc#>S)x0&J2z zLy9+#^5dKVGe{{EKkkX9K^YMCe$AIn5u>kzu6I4aAwrA@0zt?0r5KtQa_+PJqXa^i zQ|q1v>r^J8x=PKtKK}hkNQEHEc1O#?w&x@Gl7tYT4J-xG=4vLu-Bt?T7vG6Mc&=qM zFl7BG1%vGo27H4YJ;wd2G4D7&nj#+~tI{APEVBY+z>z?(*!J(K2;h|Xs#r2qviG`I zubCyvUrnEF6mn%$PyGjTYrI?7Gx2d-aC@=4eU8;t%-EnwArSqOGFuS527iPH(4ZiM zIb)k30G!7W&Dec9x@0ks*a7)~e;*veo+VUW_hLZ?J5G zS$0mrvkJuH@)=06B2tW{jRO{5KuatH6K^mqcq2UVDgQxm*{_@m1Uu)VA2FB;yb5Cn z2^?EO1(ja!#(B&C;VIA!lc?zuU_5!9F|g&Z)GVH0RxwO-yRghNdIqSVKyUv+GDv%H> z5-zzMTO3EzPArhaFy0B+|&<8zh~ zgZV~uWzrgVD`|y%`S4(`QKMde9C z97OQ{D0i_q%pH3XzPtk+Q#U!Vd}DTfG(*^Mv1UcSDR?l!DsBg0sori8#aV&K$Ee!I zWz~?$?73q#&PUFF!O!N(b~2PNhDWJX7VJ@S4}Yq*+({z>z{pSl;Qjyl+=<3ZB0W781G8RMFxO@uZK2e1&r@&KA8rrTr!G~H}JL0AGjS6@o4;= zORzaPAfJc=@x1CsIO0D5O&}$jp$t?5#V4)AL8kzM;NusBKdikqYCyXz!mWuXiCbA(@WWAnjy5zv&D4w%b5j<#)c}q@0OOAlL(S*L#I?-viy2?AwUh@3=Ad*bWK*B=cw&Lr?jtuRlFIl62!@D>S)4%hgghq zZ{W6y;8MW{D^yF}YF(!{U3B>nG+9<<9sazjolk_J4)UNa6#FowS-hUgAIJ4!5mKt4 zquJA%smKTfEGFBH0n8nvV<9#j*;i~M6KjWN%>x+Q+v-jS#f5L8n?Ji3ZgxM z`#$94DV18Lt>qm#1PDUo!e7>!-(s9FR;~KL^oHTSMT3sIT|gN>c>>7SuRiz z6FYH)Ifs*}G0VjhJWM20*TSg+2jHeH(-@XFBB=UBh)lHkbPGhRBF5o>heCWwh=nmW z-%v%bj_J>J7t3l%5dAw;_w-<`g=3TBJSDG8>4b=uoq-nivf0JX+S){5QsNb~&d}3w zeY{llWvk|Yx~Eg#;6trDx}?}@e|lhg*Fx9L*Y&NLyIyOZJL3ow-)?B4=Ip~uzvvZ5 z6fkF^KzpF4cOm+qpP}VwQO=oIl8V@M8X$?UQLG|RhHyE{ZVMLU zA3vkeIpTg)K+vr=hr2%oj;m7;<;%Etw_GNXO<`#TRJ?F{cBXS#Vp%`W^fKpB=O_%8 zx(F#|oz_h)>meNN6Bh4@kF>-beWJu-i9jc$?;~TIe6f}jAu23)ITqWrvdsdj<}i$76w5$9uQMTB zgzZ7%y8T20hmvf8IrSg#V=*CpjU%)Gxn>F(UI32azyk--OA*pch!bn1;W_*RT;C@~ znw}4f^DaH2dKSflf|BbQLevFQYK>xOXNTczhVJ^Qj)X?Zhg1^0(X5FP}JuKLsJyc`3;XvDF$qHz4|X;neJUuE|SCgo6$?vRYDcEQ8wy?fnv zlvypFe(Dk3sKVV#q@_9p@gw|goaJA`~_w2eo&DQVx;FuJ+ zhxyCTQ%F%^d_YN1+~4)(M93<~a&2WfuMH5g+~75oJCqIu&+Wm3NN^YV}Pb_J(;+G_^V)DWw|&W-%2;g^CqEbeME%45N%B>n&C!%8+au!hwdi4fy3o&Wx4 z?C@Xgq!MCtl!X&@kgkO}+8HhuH!}Rm{~0T?^a1b@MON2oXJNL&sI-@^+O9o$iI2U# zsU`{%V5Eb=J|i+D&`9J?R1dXn;+dzU0st0UP>~lx2vj4l*cwJO=ewv+tqvLdE9jP^ z5;n~z;plHH0%8(~13;ud5cdVZOPE`Fm8nqwG?%p48`4IOvvC(eTUmj`^%{%xbzSA3 zTC`ju?=OIGyBWqq4w6bR4x?ZmuBkVsvXXvQ?gnb&P@wAHF&>g$EbHf- z$u+fS=(GEN+4w~zbKnR+m}Nrl%?<_%iDgOM`Fw-jwV^b8^*SQCgm=%)$BFz}8LGBRJ__~*Ys8OfeOP+pS8B$i#D)%NuIUI>WB|K{c-Z7B9 zQ6_vu5;$sCEkj<^!C0DKmL8D}8O#q>wB&kFQT=OlyHu-r>e1JHhq$>6Ez^b_Dx3#@ zOjEI5C{G%#S~;%n@yjzq6v;XH`5RAVUGfIZwt34LN=#{Vi;K97)HoFjHsd%OIEsT} z3^{8m9Ma6y^-dPMw6cgg%FTa6zC-(nM)3UtNFlj4GYP_6IuizPo&lUzoA3jygvmoe zwWB)mqC}mlJ&l}r0#q~T&Yn1226c|(x8sMkUSBEVoT=Hu0R~V9AUFpE=w1_)tpjxX z?@s|c^Ds>#xbJf!g;|}f0TM1J4n_qZ|H+W0vYk|Wgw43(RLKP~s2fyT$^w9}Ps6AVW^0vuf%YqERWw zcS(cdzjt<_{$*GnED&(}pYF4j%rDS&g=sf9RZT3A!-qKagX$4=Gwnt37^|g9W?IH$_$o_cE8pjx76DBG9*zrjSTlk@*FfcwE zG8-woxXE3`t_dZ4C|Q@qTk3Hfd2$N~?fxO1G2l}C+oQ+3do=UeP4rv|un)9+br-h@ zbAf(QcXWXY04y}VRh44V>1*W};E2gf#V{ZLu3;{m(Mm+qo<{QtoIYA+V{E&TJr7F3 z;S}4qvVbyJ8Z%e-HO)7N-g_;_)WeQZNp5$$Z zkR(=x5)R_=#xDaQT0~zjxRO0E+{rs+BlJ7aR#92;k4wipaaKf^mdJ<%qmP&3rJ%{E}H*`AJTJ1MUs8(ZJghYwK2EA*)}jLD-jV#$ya- zwWDl4ZBU_R1G{|+MVjBeeOno_3r3mlPryNR-#a%*aghxh4mCY9w{d~7B;7XOnpcWv zq(<62Cu%O#*ds?I!_ELW4tq;dd8da2aE)FddkRolGOZ6#I=ZE>S9sm^OT$26q48QH zAC`l;ZArnD0WN9($BB}^x3%Rm-Zgk*@?M4_n;;JOF~SzjLlIrt zYQ*f=xplb*^PX%m^k{Wku{Cqf@Y8{gD+b1Y2@|aQveEmoB-AVWMf7QW*LPR~R`0>4 zBCm}Zesa_R-~So@+K*tF`8{g{V(W+<7}WGzCDAT}22M+IV%dn1=N=C(zX)_>Ktqof zuY<2%1mat$VUqX-RTY~@R}Ah!m*REe$gUv(i)!Ys`Fn{s)y2}J)PFvO|e8e##xB8WCdzlK5mo`4#e zU8qPPaI}509v(hEmhNC~@lf}y>0aPHI;w3z>DJO>tPK$krLRsAi}Ax@H#qZgvo?n5 zvDpB)B|1YoKJA0)U0A)a_)xplwRvlglPHqh5oZnkC+AT`7Q+zPu1KlHmtKc4nK0_qM)4pAptAbj~gjv0qaDcw_#4Hne&M3$|;sFJo%L-5Vv~CCF|G-umYfW2cR`r z#lJtEDPhovxdUdH0|{uquP3djzY>2HSS>obQsaBMP4i?10;s6%B(oV&pzZbWKc>I9`^8!Lf|}HcmdKEimtDx)qGkR=Nmd94k}4u_?gEMt^iv)Q z+Ygx+N&>#e+v4-tqYp31XjsSQziA9_wXn4I9l@gjYwn%VL4CmiTB?#y44|bBM9D5u z?fc?VW6cB%1qTJUFvhLM-XwT76P8Wn;yT*ql8l{W^H6MO*o3uB@W_B5q*64pmvuxm zSrm!r$kHgIz8GXumW($eMH`zn#*dBAV|LOQl`}RXj4>)(WMvf+C06Fc)zcp$i5;2D zN)fZy6~T&qcwsfIHS3o-I!s7NeooYi5@yDR{dg4_ny104z4!)QBO=u-D79nM;%x1_ zt9WE;*2NirS2ibN&O{M0uGlL%&nir_>RDzMMw=0Ftm-GnFrQeXp;g)McI8W|bkzu{ zsz-^cIPdk-UjC~}zlijr;Jh9_`zqea9m{o@AtKr(CAp$F;wXkod}S3hvLF^!oO_z; zCPjHTB{7BS%AlIFRPvxzji36_6Gob%;!F``O3E0i<4KXo$kn`*87*}|B8%WiQOb%T zWmc-{Xi+UzRG9a@baadq#fh>hQNI)Guu(m)C8mYth;<_4TP-aYL2A_yrB!3J6+GH2 zh;>o^%7nhpN%tDg-9oXcG496RA3T#VTE(4ZFVpvz)o0j!tX+8ZFs*K9mCw6Qq^feH zOlQOqYeopHjA(uMsM@j8XxA=_ybG)(+uq9Yqibp1*k;x=9(N7{%n2<#n6n2)(8xL)K z#2R<_agQF)kR~EVgfb?^jM*s?o>+z^uHnUITHKQ;OdRo<#aqO(VFo>3Le=!9j|(l#cU3`b6}mYK=$s!nsnu&SgtCqftb)9X=+Dj4-4 zw&=t=nP?{>o=zvu$&?cr<0QtO%rjllOg78Jo658=NG2foxZ)@kI>SW8nxc47H8i4N z5l!$zK@{ORDxwh4S$IPfv?&S}-9mPdh8#X(46#GT6FQ@@!feC8LJOIZgkBsWAfO2S z7(%R&k9Q`b1=Xw|IzA}mWoM+I3QoKz(K$whkG)cjHWI^i5$C^g2YI<`JQV4b;&q6_|XRS{A|80SMq`Is5M8ul~B^VBqlS$4~NF0{u+YSkQ1 z6~j~3Bjr(jG^!X@To6xVLDPhhXv`cMA_mPAOLN{#$%D+AiQ&f|dZw<5IcJ6_tQL%E z%}$B&WLh+tDNBZor#~b`qO#z~lqfPch77fJ6K4@0R?G+=^P*$qNHH#>!~j9v$c!)& z692r8uxbn6pXd&$Q+M*4DK^e{Cr3Lm*2xiX_d+_cp~nz~h>me$?1_m15h#rGM~FBM zT|=DGbEs6@i6xZHM`1+54e?Iy2+igaMyxENj9N!TTQB?#)CeLq7QKs&3?0M5H>`z2 z)nVd7g+oMX6%_slw^$VsZWeCgD;^~7q6tQ_OncfEL1)IV{!d=3rH!)>A9Oxe8ioM-oHAk?hx$uq( zVi(GWc8G&)!EtMw&?<^F)JZT4!_nZHU}x-QDB56W!BPqfE(s5_q(a3vKTKS(Dw?8U zB0{v`br}vtgz^Q7hbRSlJ2as$OVQH!CMwh)?20x-{jnx4&^bt4jh?72)PiJ>VEIf$ zs3i=wSB{AY^@d*9F}ns&#zJH2VQzu0EQ$Mq(eM%vRTAbyi)#|$FG=FU$s-k{9;}Y? z2D*eqLkxv^RD!}s!bOD=nTdl>LPTp)k8nA-e*=|-6TcqJ8X`z6jFc`=hjA77#5&aVB$z6^H3DK_yU6ZBAh2kPdN!REQ#Q|P+<(AaC6~vl#2UZpg!Eg%LU)VgLrQPQBg~)g=z$c zuN=v`hS~(X+!Z1& zoca79A>nQ$SGXI($EmrX&$nV22tudARD{M^haafosBQ7=LPd*$odlEf6v66HHy0AZ zEYy`m4W5Rwg)8BZ$jm9yhOl5E2DKqdf&~T7NVyHZgrNvk=1m)>D9}l$DQzgoJzN>9 zE=((Yil;VAB@{`pO-~zY9O@TtCs7;9ERaNqo_~a(hY(Yy4J8RT7JOk$8%{0+wGE91 z8w=9nNrMQ5lL%Kw6FWH;J-ics!;6F~hg#9{P?m5jN!lRP5Jf>QIMfC)F)|7{_!X*Q zOu^)ECX_IxYeNMEJB7HTMH^%Jm^Wr0S*!#EMDD$I?JHq=p|MkqKi8*U^}DVT_j2Ac~-7UYE0hKGZt);1#45DN(Uw&7MG=D~Ho4P*~P z7fOO{uq^~tpgO4yt_l+mW90xrs7R1Hkc-oX8Hb|@ei=Y8r#5^VA|{x^w#oK6@$N-4 zB4gc6@Hb-gh?z)pcAUEo15XmvHK%zT{$ zO*JQ}>)eNk_G^v*o#|hYe&_spl;08KcV*8PMDty-?z7N77s&?%9^Xyz9m98FeMO@? zV$o6XbXPQ;O`_6p=<3Iy8)4~!H!hl4ul}B&JE5=cNOO0bxj4$)eH)q@P0pssTwdgK zwA`Ihl6%@n&<{jAS;(i3Y`Nh+)Q>1+RCa zl>vd3YeX~)}y@NomMvz{x(Jj(Mh+Oa}%Qk#pZbywUznMQea)pqNew?HBWDC6m(aq5u&&Yz5W>Ygk zXvCro*XUv!$(fA8l&{3dd<>(u?9eeA(_kcFdR~!vf=q-tv&tg#qKAhBv!QwzvMCC4 z1*Lv8VWgpSLcCK;K_OZi8C|hX)d@$%pE@Vf=@-Iz!Rn z@_&p$RH6tX7mmyuqO;)75S2AG_sCNVQ$a;@G($I=8AEL?^nx9#5QT}`vxTF%U&uiV zIY@C5Y&b$RrU*G)xhUd7%|ctOkX<;NQ1~T`87-)yf_#!vs45DoP@FIc0)h!MBgjgK z4RWyQDimrI=8Pt`GqFk+;!9z4sa(MxQkWH|6hVZ@*GesviFXrnVOrXak?qW%`_nh= zgLKz$*WB0w*->uDRK`sadmAhHSDacp`C^-yf) zX4qJZwV9CO)mypMkRNQ0RU;L?hDc_r4ei5-NmU|Km71~%GoG_W6(k~Dly^6kX`hc& z1I6)gjs5@unTkI0;e5h~^07L`hhFUGf=q=w;(1!0P%WB=g3H3EEZ2k!3L?jQCde<) z4egOIWr5-(M@X;~#}A$gViO`F)W{{!jkWFb-i|m}Mg3Sb4V?%?77sBKCDHsOI5ZX> z7wkr%si>ftaB68 zjG4m6Cl7KmnF&=YX(*G6$--S}8HZMd6Cug8aWZlTd4m}dF~p9u=b+#*BU((55G(5N zipR*Aj(Lw_E*_%HxeH_yY9^Q?hVIyy){X+%@H%I;2{f$Eg{y?4^L=aAW5MS@(>i8e z305JU3lbEahna_tT{sGN2m5J9*0WApp$Gf%CuI66cOTNpH1vvcuY?m z=z?d`a28=2G;4zh%Ys_G%}J=MvrrVqF$>{v_3#>oMHy?BP?umktwcins&0WAd`WAQ zP%Y zka5CXKgu1>8g6Uw&0ZpuOn4Q~5~1$!GMal);Z!V3^+M3_cDQIDpRP#_sp1M2`Dx>LA0lw3|K5rP~}!KfLN zAZ4~P=HuPOOSFqS++Tgv{6#4q&O%haxT~|@m5RRWNs-!n1(I{_DonXJi94gRG0(2- z>^$-_+AfOhKDXFWF^uRD#Ln8)k$J>#?^t!OR|Q>HL5@^&;Z{@EDyb8isA%0E_Xs`( zduhL-`o-DrdD&2jo$lXHLWsq0YK;9+*!d#Jx0UZkXdi{EAzwXq>x^RggIue=wozds zd}wrM(Z!`a9la)#I?>zo9t zle5~y9HJ#=3Uw`o#g2!Jpm5Sto*XHX^-= zvYy#*!!nz179sTA1~Us8v&Ypg%t0pHU0H?EstS&HI%Z)MA?VV zmXTaIE1OQ;9(*~R>|5B-ZOWN>3FS5=jN4skuebirM!`d7v8|?Plg+j;He!=cH@rG zMes-jJJH03f-Y8~#YpUoI3hE46tSQqh8VSW(LmO*hcz1JISRYZC)kt{W?vZjYBWcy znUz(e;!7v3g$mWKQ#MM?LYPEtj;)h3S%O5-u0%wHb#?RZ0R9~d4 zs;RP=R8_gPt0IwL_t%4z*lVa>pI4O7OK6JovQQ=u8)G;;tS>1ErwLM_xoiu|1xxj_ z8k-6_YiP<2dW^;)=UiHg$~?mL;@Y|C7`^6rQl{~3th!EJxQ^SXk=_+YDl8&3$B=0I zO8JTxHT6)gmaw8O!AHDjW2s3|XGIng5oIzpvbJVom80M@R!O2|8M?M`Nv#VN2cdFW ztyzR>P+BJxe#dCprL9+$cbB2vtuD+R>wb@LH7J;wB+>AqFpp0uDEE7i_<~%V-9k*6 z)n&KKo7hfqB>UqWJ1U{rO`urwuB=L`sUOKO>NdgF z<-FhiAv){w@_*+)YLGLla+LDlhGDLky#Dz1+Zx)R8Zj3giZME`E^|=;h9me zNIVh7&eO{#IikB^P7ezPz3!r7qAj|c3l_Jz&1ycs%X70AY2N6Cv^N#brA+&>nD?eY zXFU1D(XQkdehOpr@{pJyqX)}LA}%71kX+;rhI}4sB9HL;#lwxVQ@DFPgpRS6G&b>D z1sRJ=g|K1cJxQPst1roW>rYCploK*xPWqWJqc?et+&V#scvnLgVVz%Q^tm-h+e;X+ zV75>_<@|X#Sy`AP_U`PrgsLfXt8DJ|HdFYK*}^!+OhUo>U{Z>?p^nfHhB<5#R&J{n z$`fuT*a@pVxWAOso>cb#5CK+w;eOIU78_VCQL-A&#|K#Y^p1EXC7U9WMq2zbh0Ug0gh#;6lw~m z#sWHJ36)hh5DHTe000;a3IzqkQi8QIA%=bw00eTg>muZkp+9;_03G}M=Ega+j)qU( zicON@oR{Z_YRF4$n2uHiK&K?R*T(P@zwt66)FT#df};=8%x@RVv1!BGH!f9~?ys9U z$w^;gkp^*{+M~O8$y)vk0FRcf8u_Dt4sr+*1G`b^sP`p~1YO$fYma$oyh@35@o)4X z7|^}i@t%X7)&eL5UW)irSX0>UtgMr|a=hLTt&mX(6msu5{xo^YMFhxGC{j=8>I>!bTE*WWSXx zbJmYXigz%tQm9UW(sB{BA?R`Rj2&5LxYz1Nca3*+ILOtWsupDLHmGm2>}8G)YNVJI zV=2E+M|MPUaBggHpX?D4BDsawv#?uLf>yg=O+$(ogZkZ|D z5dwQI0i0*I9rarzEsJ6XGyICsZgMOgBvE#uueTy8NjmBb@-TMS{!^Zun%oVhm z^s?=v7Lx(__wXd!k%-9hu0W%>?BeuZ>Fepw42-|sE5PK$ZsQH*X3gS$@$qPat2h2D zA}Ja`cbXC<2)?BRIU%)W!BeNQBS*>`PJniby@D}uzaMi=RjAH^PXSdHvCxvj=)b2n z^yJX$-YZ_$p>1xuS#7XTAto-5PbE3Fw1Mcw{gXM$hbPm1dq3Qo9YbX)jziaar|N4W zz@EcFZ|TSlkVL4d_-jmkU`A?iiIn_AM04UY4WSyabPleP4z`u$KOdb*(xZNC*L`B0 zHAW)!{}+zhu@^FaWxN@v&9XS}#b4m&BzFc}{bn+IfyZ>K*fys~Wu<%Wky^Rn<$6m< z3!V$-dJbo^3D8uwa2e7-T> z{%6!tq}9Q$AAmN$=wj#o&o&SJ{!|?Gyt`)+dlouplqjPrelbVTHR8btMyuU4R-@ z2vHNAX)^;n!A=3KvpBkqz#Xx2LR@I;IEDtoqKSN%&UWwg-KWJCJc(n#L`ByWAL&}> zYdJ;4TVe03J`?;{Hq;CYYK2yX;zvWfOZM8kZHb|uY;hUkxPt3x42qOEjBX8wTXbCu!SLMMN$fzqzaykUIgyl_(~Rp0C@et7|-tx>Xr6UvbjZR zs`BvysWEQ^hku>{IV^`ZJ764_ephoWOm}01EpDnkGp|QydEN?4Hw-(qQQeyq4BFHp zF0bUKK#msHnMP4w8Mxa+x_8)I1vku$xDeyeG?Bn;>xb}U7_X$s`&YO0%M%OZicHf+ zXTCsNFt71d(s@QL;^dv(-|$+V%tE3RWs^P1u|cViKw$#6t)#4}F;}|sE;{+6+-qj5 z#X_;OY9M?u2$k=M#pB#hLo~X{|21V;3uWckf02irfiuRzXn?pj4H3pZ*3u$=RZ>SP zo0yZ4C*zUbyFX$s`z(&e`9l|dG!zT#qfVjVn{tqeQ#bAWlr};DYqbC!%bX>Ttej~N zoZb{=rJ;K>ai0;NDMzjC4MA2c-V%WVF^hcO#W=wa(s}H%4R!Pg=bOBJ^=&X}JwsZjI4#b1^ z;lp0~C2xk(s{xLezqP@f-y#uDECoe z8!?QU?1dZER?Hvj3*qa>v}=1CRP-1@Mv*;~h(+~Dz?`EvtQO1N7nyJV@rN8K4Ix<&kOgap=pD zN2YMq^id=^m*~3ZTflrSmJWF-D3nT3v?|FT>7fiWT4vI-z;zIfHcXxaQ^9?U1^cYE;)Yk zdsVpTjDEIo=4}SlK;_16qm^W-JY_miE5f99ikyMQEa)g=hE{u9{N>B4fK<0IJLK}l zB$X(%Is-{kC@vnR2tbM2U9bcY2<6~?s@QPM?D{!KF?IvfjT!vUAWy=s&|+jQ$zeFY#rAwdTiQu6r8dJv@} zw@5D9Idt)+)Gu4t7|!?`eCv`V6nU>FYt9j(&d{zeZH&24BajPas-WiU0^SZgKdzB2 z55xPF`Wux0!JG5aL0xJzY$hwY__&(lDmGP&v8){ZpCZD?BI`pe+|w3ez74IFEO2WZ%7`xvk~065R)#9F|A5d%(ggJ~ zD~Cr0RG%6rZKR0^lwW&|?}9`UOjIv0oima6*$ig4k8(kL+tW;27KE;4t0)Q zF;7-}N=U<3`Uu&72vEpzib(NrgUY=W*2W>=N-FHi z&H|Hz`!P=|Wv;zel#d>BAfB#KLFnU3q^Fk8$tMmBcpK3j%3P)Q5&VRBs3RmAe{?_KLF+f^ukKr91&rMY$J4~ zjT3pcNfWVyAgtaNK#C%b&^r#vhyi#NH?ZA`Q6o>Q!&Gh5`;n}An^FTRE}1^@HL!k% zgev%|dseg*I{ZjB@A|Q)@gCVANNX$dnluOcxwSF&&l=c|t`C3=K|3yU@icdl)|Ap|#00?H zLJ}BF0uW$k3!1uK3Eb8T8EwWt;=?yUt0BzsUIy zA-;ky*;Bztl#+#O=e%SX9mJTX3hgJ`v^KX3``wp+`64XL+)V9hCyfZxSF(US7*AY) zt@ik#y;{JJ>wG<~SmHK-Ivn9JPp4f;_GwbLi4~@-c7*1Q?4}9qd305|6Rii=__Yw| z?wmFbz?k=7>RV%`SwDDJjM%j*$+}d>a4vMJ$IL?wik{xa3G!k6d zqbzwUa_vGos-wL%a>x$zB}vhxg>Qib_!eMlj)nO`@lD<{fXP$L3?#`!0hca%yMTTC zX?5Sf2rtu;kP&G;7k`1hwFULAqFjMOj#}^t4`hw66H(_Pc!04Bkf|%>C2d2Nw7d(L zKN$b4Q}?6ltL$OPSlm>vtlXmqx9Jp+08&?oO}-rb|Gpg3yO+j%z(F{mCnnaCB&z_a zj&T$evAD_uK}W>%T3B-72X6Jn0U|lRYUXP!3IprY%?&Qfrt|SJOK0bP{t))hLi zqoS<-KgclFOODa-Y=_h?=QJATYAeLYn06slwbfnW(!SUQt|d`vW(p_avU~eI?7E=Dk%p35$=9CNR=)5lcSRTq&KI=;-IJJ9^GV3I$wRFc zr2N9PAf1A1yN&B(A`?zYuceh0v2?*Omw`Ns4XH=?W2Z#~bFs&-@02l36iw1G;QCN= zt#BTWko?H%%JLJF;&$NFJ6ckj5Qx5pO!_b>W(8}X8XK$6APrC6psAi(vm8LhHwv?<$en-jCSh8;@U7I`WKRNiN>bBb;1`gBp7nqc$sKUQ92tz8 zLHyYu_>6Qd?fq-uc}?{A=k?@1t8b!2BowG4JYX;z%KdFy92{c2l4Bp22QH5>W^zFX z9(UG8;~p1`N>$G$KpO%2kGq}H1HxSPfVw&mW{Ftwxqjp<=k)d*hq^!G7Gmg|jMyEv zVcrsw(B^2!GGfFcU01WYkA z=Dp(9%o7bdm5;~G#{=>D6>Zs9?auYhu|E)C5r%UAS7zBq=U`I!JqGyOureaMi+5W? z1300JsXj#wrG}|2P6{(Z+)pZ?%#E*)T^x*B#?CEMMszo3MFCm}4Zok4 zy9%UuoSS%estAG9MB%`2)bI`ZIad;PW|IzHkYDBwc?w_l zDilWivqMi-s|KAq(n_YDJkFqqf6Nc2()($=!1M6bokHa(8u=$gjALL#%okzGChprI zLwZ}IQ`+dqkqf*M-VnYfE=ix|uX4GDXPSVTJ(6tRJWRE%z*oXH68Z@KIl^)aRj$1*4l$0Ly z;?H9DL2u(@sfKOqD8g?iG|KnMB6XlKE)w$Pd$axiGSAGuOMOm0<;YM(Z)Hg>RF>Pl za;onSm&jl>kZz^xJxP8x*;YFPx|(rr&d)D#P|{B(DjjLLW@p|fZXtGP3Jq?f)QgO8 zO)j&QjD&b{4@GS8calc@%O8e<7tmXLmuy(GzLd)8KxkMJ(MA@~ra*SxDQ-K?lj$Cv z7cN@F5lBrqmJ(4UT|o?-2T`Q0Y1K15u<0gb^HaQj;GQa)xh%?F8GoWuRFeDam%i;E z8UuVNb8`y72nm`a;(SOUyw6SM3ljaL;C|zpZpkd8zVI*JFTA)AXP>>IhFEar*^R-; zq&8MYj1b_wj4cxFj@$pfogYu)LZRg3vvrmj&PYIfl3e>MisAxUzHTRKWx55e&1w#e zcCH9=%Si2OJ`n9<`2=GI5pEa8wF6#RSxelT}$1eaG0JG)p$9UAUCAQH(AvBo=CfuHNTkTfH{Zp$JVMYO~F*)|gv zdH;Lfga;nAp&zqXWbgBw2eu_(Y{Ufu)>b<{Zv&2!b{`<`Z6~Qu-jHaikr5OA(rc{R ziekeuQ0%ccR+!}r{XWlT>V4Y?UnL9N$-aC9nNgM)X>wpijoUDq)>0(3LKw}rgdx53 z{T$Dtk?cLO1o7JUsa1k*kl{4O$dGxONa*6U*~W!SPHVjPmkyrCCsXwB>L7I*j>McW zpmy$H&->0|>V`wZ=>r}5;;!g&0XTSc5D;Ma*A9YS$Pda(bg=Fkn*h@O)VP5`)_-)@ z;2b+evmV^^7!tkS^;CfAGqF(OaZX0AfN!{679;yK5DsHE6xJ8X&*?{qJybd-27PWR z)BMo1p4k!q5c4Rl*8ZeX7G?=rKX;+&V@dKt^8W=hJ*PUE%;CH3r#aj6KM%)wIQs56Ok zmJ);e>R+iBodj`_+|%AiD`of=&nOPrj;*ens8yB(w|`dWxUta;u6*oxO+F+ur!Ssl zdF3#&N8($G`3cfQIzzA@3Q!RuCY!Xk1&JTCqwXkEUk%!Xs;05ozIj1XbE_I~)}NVd zW(F(c1CA3ks>sw~MhqdPI6%JWZv^OQm;nyqr(V|00XgqXB*`U(|3^Y-boxV^6S7Qj zRfL8Dl5KH_2F|1GRSfkw5V!H>yAZigx*$b%%*<7kyvmBa>&1S2CMQkNLhd9rTcpu=Ou&qdMOOW@aTz-mm zfd$v&EP8-8^bQ5uIBSw>G7wHh%- z{u6*(mU3kexb~0gjCj$q5RlpaGn}5o_XH$)NyyU~hYBE|>IOFCag-=&EH09qzve(H zbLl+|P2{US7pOgl!4HoEAo#4PK@g*0umXnsb!M@Jf3@m^DQ@=5@F%MZ0IJ!PaX2O0 z8;zkfoQ?5H0f5UeQxgW3{nLNm14n%1pAa`ZMQ#4F2=^Ew_N9tq9#ZgL_$JstLmyhR z+cat7f<>u9{5lkKX?%*1?n6F;fYxQxR1yI-icljefd}VjiM)n7O8)+Vqzf_myaQ6g z>Ey6GmoN#R2+X6~Pm|>z6peaL0EWi$!sq~@EuGt7o!81C?@y1@)MQnGT;pzi&~dTd&lP7cF` zE}dhnhx0^4$FfM5!9VDc>@tlWx>5f&^y`U+c#fhIgN^tF9h&x6?N4@rj0-FpZM_Y( zqsgp~vb)HbvuOssg=9by2>SxC1IS|so~PnTFY#Htm_!!BJT{u)`Nf$Aiw-&M1#5K-ZrTY8r2xoWsZlw zSvwMTSa~Ezx$l>{@BjePl^!8NkbzE!E@GJ6>hHLRT$`Nx!7nd^-;PTXY-(8;iWo0bBezEFgr~zoxk1VHZ z0CYGz>|z=GV4b<9T`2p87bQJoy)JBcr8jArsjiIX(h*#oe7Iy_5RnE;Bn<1nvQu3F zsnlbZ`i_qbdnd`YE12=wHB*hV81Vpk?%_rH^$V-nMeggKg#MdQIK1ndXIzNg55d^1~QcMTK^r~vl8_`>S zzHA)5xk+9(U6-xf03r#<6GQ(C(tSP)#Fe>ls+qf6T|lR5IOsJ7iz8=R?uX7FpHEpA z$w0b^#-OU~@%98lEog+sg9eS`c0>4_|}EO1(sBaZ7Ay>`uHR(Jm55j#_1&OM}Sc zkXZBJSP#wMYaP!~i>s&)a(gkQi$49_@HbJ0!zGz~)dU$$#J;#dTCfY(Y9EqnPpgYT z=S?JnDsDqQ204YcoOE(O5!aeJFp8I->?U+#y)4{`pc5(T^zXIYr{E<{l`n^3Tl8 z;seAI2iB^M>TRTjI{o@efn+U_0%Gw|nA-XdcP=H0Dw0kWEv#@%N2zaP_FI*Ogf(0c z8H>GtqV3HP`LFyz3Qb(68I?BBF+u&yn$n!Jfsyn}?GC6w+$e#s37C*J6RvNtIwT4r zEw;>*wEK}c(Xc>5khXUv@~DjxAfDY|tc4evbwi-iy(_y=OJ(;!AE-g0`$Fe~c0hF2 z`J<~?tUzu#Zl%<=YE)v)s?|z^UVl68ExAQF8F6Bqg7SAP2Sf!eOGVR3M z72gCpsi@K9zj+2Zuu+PG7v7sz%+uJ zL$0McZ2v5SZ^xPlApuT6kCBhIXXp9Cgd|)VcrPs?uL>q%+I{%y2M)*fFN1|L7-zE2 zfSI*Imo&XiTK-AlfYG^&Jc+jgfFC1mY_xEk>nJgeNGbo*qe;VZ5hp>`cQpA@^8xro zrMwe+$HfMpS`y>1yN%5pY&HX0#93x1&J6nfFwIO`vmmvUo4xs_%QpT*6gos~ZdS^| zLBJDedOL0p!ZsDhN*W}H$~}$Bh0IYC^(kNEY6ZnBDK?e`a=hZ8=cEa==yyxFb}hp5 zMyx2C$nroG?l9&<$7xV21KZ@|?Xpj`(THgZ4se{Cp-bW+gd99w4FsT(u3$WO5#$ie z>nRvUproWlf2|7FdhLj%XH-v$(u~X)ONao@9stcZ;L#JZ9Crt{9Igo1|8?R|^y88P z;M*7>ATL=)z__!>F9F?3SM^{5*WE!TAAx7EvVsFQs{^vB1|fN5x9rcG)Ir!qRJMlX z;gZ!HCXi5vc20a1!hefWQPE8gUK8JBX?%6rI#bFBj=`$YKU4wHP zDPMs1H>aA&eIc%W_PjKB)8*tu%B%z2%BWYCU3~7xH$_E}jw6yl|8TNBV6c!*!AF@y z{2>h*9NeD2VuI18(%C0HnQ=CopXV&%^ziS;r_&l`7(R5LOlr=WMw*hs7>%1pf=%K$ zxJPAiW;96(h%=zsxggc#c=8e|D9ABUk)=x;NJ zvfPVNrsJcns0B;=_wVyuFUcjjd+#onR8XV*C)%ey`DBzzr(U zTStz60NQ^HI0s3NQ~!sp|Bnj(n3n&1d{Zlw@&2%%m0{jMU#UKZ5v)`E^ACSMjPdLa32R z4FV9CY~hwMQx4E&F@aV@(Y#x&V_`P#j!9oF=2CA+mK4O0#SJwvTUnANZ0VlhgYc3l zMVP2Kz)?VP4K`{i?4}##$Fk|9oSY7Z&SblDt99f{2hv#h zTPg6Ez4v&dl&2_p1+)0>ZlL4lgMHa+tVFP1%lDvyxd>586;X;HD}ppMJ}+jg`UY#* z3R3Do(Ch#nTSP+$I)AZ)XtM9u*EoQJSH)4dLJs9i*4|geVP9qW?-mMv=Ij}Cq_57D zLRM#dGBHH5ot`bxU~%SJ23jPCE)k-gF1z(oQ994wQ4!mEiO)qlmTmOzy_`>1pxjU{B2z!LsbDRQM zEfM@s@IhHE1|rTTq;ZbTheOkhnDydp{1qpqG<7@jwi+fw&Ll86rMs@2 zd5Ns?C411p-1a4 zK;YoQycguUkuG6Aj!yx)q+*tEQ!v5S&S<~JTw8K!$kz96ZYao{q39i40~xTJnUG*; zBY%x8KrWLtnpE8e1Wv(?X(7~_g8j-&3cdO!zWY?>w+F)=+$@@)$jjQqbFZ06cVX{DB{ z>?*6H5y+x1dmVZ*RGB3%k)JBg$d=E}4kO(05Ebb(Kkpgis6*69mce73D_~r1VEZV1 ze3v06w9mxSE(d|AC5ZEZRIJ+84h3fV;Ygc72j_Lspn@XV{P_httfoD^N;T$^1M8Q( z9Frfro*Q=}I|_Z?`|--*t&(3{MQ01L9I!|b^~p;Zs|}!Ji`?rXRChFay!!VnYpJiO*M$kG~e6wY|yM1-Ra*P4b$2IUWC=2o%; zq|`kO`5{;Ce_nqc*AP+3PLWg&s$`?G!Dbb8>3D!@xM>nEnzSPAw5~}D6sTy`xf^{9 zS^XZn+CH8j5i(8rA%V`V;U3G-WwQdJ(V>Ggzwqow4(aLxg-BV3EZU7Bt9|arYzjHP zABtq;D;yzj)84kbMqQnus0>ICc^-c(Qz6nv{9nLqRbmCD>!oDQ^AqzaV0svVKRW}R z7|jDO*$m6IG6s>2SQ|3x?)p0bcy*&CzmhR1-^>_9Ohwi7aqncv__L%~i3(VP9M*>J zN+ft9)LCj!^2X^NXf9hGCmpkSDh@c9!%k6>5VgfaP!=}y2XMi=?*37E_060~Pf8IE zr)fjxxvyFb%%()H0?Cs$b(RE3%tsnKRj2nMw&re=piC+(XGs;__}*USB>+}qv`6%kB3MVj*> zA3Sbz2^Au*E}F3D*Wf1_hh(^d>)}|MmgY;wmihLx#5$TcwV4WyN zn$8F0#AZS`$Fi`61SIRQjd{P5-KTaYOeTI3S&x*PPnEokgwCf7zJ!T5cfb%A8vsQ` z4;7*%GzNrZ)~mCVLNc@HVI@@LIbIKG2)4skNIjXEK|hX&$egivh2wu{UjKob`|51w z+MS~XQ^b$`gJ8DJed$1IphCXPI8VU98jLE`+-GxDH&lr3_`dsv1o`pZ@S?a2lLe}H z!7oNbROUsYUUOmGxOqi2GO}2P_#DO->XFR*ABAbb&HL@fx5wdoG>xw1BfrjVK+Nzx zW(RmQB$W9};US@ar`-&IhI}<=BaK9bsMm!M@jVLxl5b&a>i>FzGtbh`&brtvKT0n| z4sJXad%(jerTZ@a7j+vg`d7y@RPlwHdgd=+GjU_ivM$|!(h}9?esy~?VV7Cp;C|S& za9b(Tv_2{ZpO!e5Vb^c1u6nZ{RxbJ!mKnv8nk{PoVN-xFjQfFJM$K_kAJ7RuYNQlV zbMF!rw-vU(>~aI)jfFc>`d~)3LXeb8clNTzXVjcFrCg$rP`0e*QphQd(zuVb=8XX) z3gb${&=#n)7C}kzsbfC z%=_mo6&zepzwJPBg@n>DO=KabgpsFQ$Qjdeipun2k%EIWvjjG2g>fZe+bmsBg07X& zOh%MC+{J$h*CC0AFW5k_s5vc>+$}=?9^qxp!noofnUAnvHgET#=4uK;XI=b7j&80g zEG~qJY@M@lmd4vX*V^mq&+3xe({D?@H~?ew_qn_92V;-)!=9@v(!lr__6!h-KgQm8 zf^6k}2dK@VMe;9Uqc(w2gno0O`u4(hxJb^ckTV+8Gzfd1qDvFI^J20i%B6d;5%1+v zv)G+~1sS;U0f!gN^K;H*9y0w_N<~_4+1jJ#D%fyLaOPf?wL0GOymddFv%<2+8*g^i z^uv}5T%1uMg6|j>Ohh5dxHz%wuG(dXndFwd;CO-)QKkx=RCsm}Y(GzxPJ2HTm?DRR zJuYAyJuG?3F`7&t%ZkFm7ERQ{W?kR*JMTGQE5ktfqGeb<3nguwPZ-Cg`42$C2Wbl2 z?Owm7s>G0Q3K?kPHK$4}GR|V_jbU8pP&w*oY1YmoJQc>R>Uz#|K*5hpw%Yy$UBLk+ z&T=mGhD55=c(6^`gOooYxlRSKs}2VUpzXu%HmyF6F8${4^dVQiytnqYx$f+$f;f0) zMIHwXfTqG&`u;?f{x+v_^~@P5lOZDCi{m8hOH_95l&=fwExEvvafr~gdq>Uf$q<2j z!i|Me6`gYQQzavZ?G9qyUiFPPWff*3Cw zvnZE>o^%FGBV5@omRWU>la^Ugm>z}RJ*3Ci^LDoG`=#PpvX^83fvDS8Ow-}P$Y|G2 zSY2$KTpw4mVhEGcnvU22GZZyDCF}6itj{z`wo#{&y+l=sZ5(_czegNEJGIc z>vqJAqWO;ixUgAKO^F%jM_E=3Z3f_(0I4hH4=JS#au=kI-RF%c*!1>!)0y0pFf@o| zG+V005n*(kxL1JKPH^3I4(&5_S+{Tqo&qH*@IdsSlRU+7ufme6e1`Up_!x;-p6ZAD z((h0U0q4PmB7*Ad#EtdcwtLP8!QI`P%H-@;7O$g2;1Zo+%PfF|#j9V288N$b zsmh(3v3+IJL5mfWBCsV-mhzv&y)6g$@bXONrI8ZX;&LP zi>iIQ?tz@`+0tUv0w>DR%|+wXETnd*-eL(mZsV7_^g+gOV_m*7V90qRUwwfO))<<# zJOkT!3VTSZO4X$rf>TXtD!gKLfYCqxDMZBufn)BlU0s-ANf_d=^#qp!zC{^XTVbgIpLCI12RXc2#dem%QPlB-oj#m(7Asw)2 z13f&{8Ws<-T~sBO9ouP|6+V@Sj4p9y`7k?IY+=UBL4V7+iI+&iMA-vFqC%rMrogL= z=P9AGjd4?=QT06^h;d%Em3wdV9}^+W>mQ7#YyS6`TEy1$>`AK(#&jCNoCh1?4;TD+#rN`tOvdi4O^~+?6zXpqtUr)Zv&}-N|8)I2(~Tl6 z9o>#)Z{#&hofZ&w8e(g>hkc%JC*R4)SF)s^wMof_MhZM%&!SK@@jW1w;41qp7my{r zz5%?Y)p{tMS-7lK!=&UZwL->FeRiw~M^^oTUk*-#iEtB-4X^kr`N-^dO*c z86GnkWY{Nj4^UVgrZfN;EItt)EXm{_2iy?opzR80*%rFLsxx!__~St77KV1_dNt#I zhTq(>#vW#?=z>nx+Vs&_f!WUqtT5S)8}xB53zof+NKIeWTkRQ})q<(FFczksOA6)V zn%B$Yn#YCs4Hr4napNJNQD5I;qqg3;tr^;=LUov21L^^rSAK(8Papk-ko~S+9VC5> zNuan7%FsvSA;Fx{SQ6ao{V}q7Gj*T+Ihy-f+pk!dZi^|)1J>76-<%k0iWJnRuUC8!Jy?Vo9=R`&as!F9 zt8c$?K&bCrk$-d8ol9lGQh`iw3r4M;=6|W4>vCU%`4>= z-{9b+Ojt20;R3v234Ma+_`EoI07e|`kiW0LC0YRxLXVJ*A5uk=Fwb2_T*kd_q_46Z zLnkH@J_8S~YDvw$Fav?b_!-~X3$um)QV{eSJZ0Z6&YEGinsm&_NJ%%*jX(3MyB#)N zJ5(7rU{m%xdGo42#njgY_Cey(nF9OU)b--%6mAr}L^xTfN(oF~FAnVCchT32h4ZTM zwDp2+CeIakm^`TzD8QMxG2CRTmi62HH=G?)xCQqxTa8Bk^s_5>1UPgL-`Nneg;Q=< zXCaPkIAg?g0&&wB6`WcZ5U}CwEvD}+Ax|n8vxqAlyaKwIo-NKJs-E{mr{2pWhHtuC zPB0BkSVwG#R+}9N)tHkT@^K1t6;ZJ2%7vR7|Ruvm^A0TSfXf?UtV+Za7 zOxIuF%#Ux!Y(0h=Go&82^OjY6_=!?(6jR`6;{G;4E!a$G2b`+cV#9l_&VRSLcI>j4tI!%p9@HzqIPO1j0W zf9kzS((1Ts1gghI^BZmC6u(W8J#HzAn&4fwP&N~499myS2l8YB^*53aQIYHOwmQtc ze^TQ;0XjGAC^o3tV|C9l)X5>80yDCPulGh!Z}_`{g7o{oLOln!)cDl>>G)^ev-&;! zAaVnbQb?=k%u|5rBgJQ_v?9*DtuEBi{v1d`H2Y%w$qj*s*0(2x%hta!tJ#zC)=|}0 zxWgR$PE70f-Y}!8z0X&*uSy@GTvgxeTj%p=T`M1L04eHk#p1w& z>+-|b)&-Nn>+`&0Ak2$}glUSxL_=u;axbhgIMqG1HytW8AHHA4OF+u$v=+pi!^~0F zDNy5aqMLq5wa5~)r4<(Gz4M`L!#ZUEOPfkO)vVNeS8`) zypZ6@1{&b2MeZkm+_1g3`*1s&tYzR`%W)Tt!uAc_FpnUpcRI?tx1_@|`q8fuZ%PQS z9okHDS$|e>ZPlJq9wM`2;j|Y@m=dCb6X)CwgH(sDz}e8D=J04Z!SGm(XOHOc2cQJO zC}1hetvr)B^7NW{jDsY7d4l#rq@tcZPA#H_)!ahJ%Ep({w8ja26q7L*bar-kO&>)I zfSD1e5KM|fgS9x3%d3|JTv zjYvKRFJJQMNDB!t@71iP?Xap`{r5%ZG$D03> z95n2nxTc*i#t%`~##(KFaCMLUfF^L5a)7vzwt;}9@0NNU#gx_2rD9+BXCWfvg71I| zkYZ+C)v|?${QDYd9r*^HK81%Gf|HJcAp%;5^baw77K8B%f5;NkAM#ZUZ^b|ly))0+ z^#VXpb!O_D0}D0Ryj7jYybtUVv%~zKJ(Z{LKZ`byF>I_k*@E;wb5H_~dOhap5g7P{ zIq-ks6~PcWK+}>B8DrS2Z+UEBqD0jSVCjss)ORvNsio6~NnZ^Lnf9^=m^&(bugw?~ z@)06=jqUoXva%-}NQypjuN&6i1Ek?PN;HVFcW8`*a|PPkJ1{V&p}qY*5pA$B1kHDM zS3f{vc*7tuo3oI_&tUc-7-fd1*iiqs=VeSAbh#xCbre^nu=!>|l>Y5cY!rE#b zqV<1ESrU4Dk8v0)OanIv>>x5a8uJ1oD!8Uf5HvK0L3Nd{nOh#|#4A|f%mA(^N9u7- z+Z}3lkrG^1R`wcEAX3WpNeS_O)Bse{ItrJ~!a}^x7%RjnWNT#R??o6KSK{hBIXOKb zpn?moVax{^)mIyN0bs`a~YgJYG0c>{Ee06CFMD24vhy9FR@o9}Oou&dXRahaBN2O9mz; z5EbGcBA^ob55>}kaXzF&kr=G`ZAU2u1xpmMcFWj6vt{-|h_~hU&_at0U_}6`Q$JMq zQe*CiQKfvhc}TuWjL#EP)7X84L`wl`W#R=Y;twd|Vb)8@0~Gvx$@ zsTQX|6}6|u(nu+tW9HK2`1KCyzAwZwoDs(w^Vc!laT}WUNa5mXka)qQs8XV2-X$fq zX@3x;?UY7?q5@WiLx)HiN)gsC=B zR3$@$(1|)wi+0SnXyTspvHX|A5kJ_!9I9JM%~nka7Jb56!@mK8DI_-L$t2C@{G@W_ zQ<=qB;~zw*PEX;SG@-o!Zengcm(QPvR=#=dB#qS3aZ^NP5lPNpfx-=tETQxN3M^4* zpk|3gAAoGV&P**Qe|}zcpISc69qK&Rjv850av3Azc43@rYg@ese*9nY{jyR_W2TFO z6}`rs1jnLN%c?H8%5V{lI)e?8)AqeQEVszo=o<3)k7ZziF?(KkccqcOJ+ zqG0M>Q?MM=@&dV#Ss<)>cr_$*IqXWu3ZK7WcbYB^XO7y{M}rQ(JENtE{6c?&ockH5 zYbt~9&wP28cz*_}JAdB1amM?ZY{pl-JEh4}3F_|n}UUd4;gaxVXt_&|94bL8o>FQca}?5NJV z`djMKmvfJ=q`VvK`QB-J*?#re6YlA=3$dm$VTnDM%8T#R<#P{xNRGJB#(1VNzNz)- z&4b;~H2;0R^}VC*-j_Y5-fb++>Q7(J@BU-FHvDwvSjp(0bEc2J{u$jAJRDv0`jO7$lWK$ z|MA{mp5PzOy?FNNyYjDcxh>CjK71#0{_5{*FYi8i7}e(Z=;cz1U*(&h31*i*h6O$P z9dg0=%(QO;W$n?U-v8eIq^A8LE0=V`T@n4wV6;fUY2 z#Yunul$o^fJV)8jzqASUC9{J#^}tq=fA^$&hKevfgY@B&d(>r ze42i8^n3dG-4BP)Jo@jbK@t6=`{17+I=60hkFTFU=Gc1S%CmuYftC1I{?Zqt_T7l= z+V{GZzEbr@^V+QGJ;});OJm#4LsNxRKn~|ZnidcRuCFQ8L({;@!FBI)$ zYleisB7gAw@b%rDdoNc%{@fFAb>Z#HUI)9=YZu@BeE#&z%Qq?MpEDXf-aw&i;r{ zbVlW_mXCz(UvB%U89gfh;x)3A^7i4Q`Iz6xSr0Mu+T1QTHBw{jBov zO^(_Z{64GXnAhu9ZZ`dxy?!C->E(X|Z}v`Y+g@3E!sGWhx1$~{mzqr~r!0^~MeWm{ zw|BlhN(p?XeEp$M{2xbWb;Rq7R+kO?d;R#|+&+ztSl#${9Wynm_mCD+?TYsQP|L{I2qs4?5HfG(S6d{odZF{X_Dq zI=Zi(VVr6EIlTLIJ2H}f>s9y5j&t9B zm_PV=QR83l>G9!nwF@cd&OG~>Aem_79UIkqrmbide?D^QY{2Kg_hQx}eAI7#da!o- z?AcRZyG`E-KL=v{{`$$DYzbMgt4c-Xzkc*D}4&SJdL3uNw{&*8MlSZ1L;n{YQH)K4D#V_%i(su%fX) z!+0(4e~3KcJ*24!v5I0@O-+>E}f`vZ#$6I zRrBAClfPkZojZw{x$cm>HK-GAP_|M-Z@*g>VNy9faIybh)a9!FUg_U-kH&}ZYX0;8 z*{!0#ay;J5SyAIq`R6X*D?cy&T(TS8y;tiSH|yi8q}wg~3X9q=wqK7*4!dB#QsJRx z+Zca$+*HqslyUp*^}f#^7cK+@tljrkfq_;-~%;xmQW{d3`_k^Xb=f zkDn!%c&#Np)4YICjH0#v$>;^%{MXp6uOGjD z?|k<2`GxmWjfVaE6YXxFnrYDZCZYQ2Tfnu}YfXmu>5Sn2ru8^~E}oB-UE2T1?9t5I z>o24p|2#T(`&5Qpx?{t{^-~l2McR4LtX{7S$NEux;C1Wp#}Uo@jL-d7am%mlX;#^ zhrYb~wZ$>($oAn6uN?mRmv!8wuhOsTk~*vk?cP)v?|4-pHp{zAwd-NWzm_Ne<5+u_ z?6=EzsctIrU%C?U!}s+3nYq_)h4p2%-eug8)QVzvt7Eo#?xd0QysYqhHyR|;S0j@w z)fOL>BzZLX^vbNhY)aGp+*5Mqh?^ndaY;=IwsOaPm9o+*xiaOH3bPFD7X^7q&7{so zsJk?}`J?n|c9wS5&8z`1TtULmZSQN0y93UFDw&saGb=O0AS*n>z-7SkJXb#ms<2cV z+h{j(o#wmbBZL+6l1pNkO#*b8l7aT6LM>&q+YUo*g9C2vp(W?ZB{dkQviYpnVYn|t zB}wN>Y7qa=YG>tTYG+H(&~BbE5Ku!~m+H>JnA4oaq|5rdt=9z*$@9-@|p)}1Cd&8m3L6=dYPr#WwKoyD(Rylk` zmzL?5MqlB{P8A+}m7crakD2qn6<~qW0TlC2yX{J;2+F7+<=t;i%22{rx;6>0yYLDU zo;nBM(=tBN?m%6Pwz{X8w(b!(BBr#a0cTj6`VP7a)Lq>_;^boG-Z<*Pg6c8Gl1%%ubiibV?N(CiC z(j&Kmj5_Z!BA@X(Yiey?VtOq>qH1PQawECJLB@^vd~J$0xu!rvF-8?k7<-klytOIt z=vwD39dX6yyr%=c8_B@mvQ;w?3t56k{;5!(^4arB4@r2wH8Dlpl8Kv;E!*A;hdz03Dn_~yZL#`dEs zCiXH4=Q+GC|2|&V{RRmm`@KpsaT@^$Z$MQ&Bo|Gt(mX;|sYjDl)T`^W{G3*!^7;hE zciX}@1CmC^4`eJxotWfyEmTmPrFBd^Mfpe%k;f%ooA}UY5Y2&3(J|RFwLKuLl%M4v zHjQHkJqFMzi+S5bBZ@XcWjw~vwRJ}KwL#H*wz%AVmC|Rp zW!5_-3e8!ibcjt{f?cGPmho;EViynhIgYe-Iv0tY6%803$OGg`d<8K7@+(&4yv{DuInS&UIvbfULX2L>!(QB_ARG34kn~*96S@faWHbj%K^mpa=$Au zAo>ZFYAWrsS{A*AjVLz@D(xX4Q;L+d3B{nGim{$i#qf0=4LT1T4cY{Z28~chg3?15 z%?!Bn$;0ua)gW8i$V@0`I!K{$y*kB!A_Zs>tFM_6zHO^z+vJ*KmY_O@Bf=*j2|T$g zqN%^-jhym!zPmo(9nE)#@!cQS-~Uw}O;#wX*WPsX72t6~|D|Z~6E5OnssNC1d}#uh^XPi_gduiH{gP1BVGzR0+X_lrp6qSJyAU741kvUG%5T zVcWEHXZ(jT$4NmS1noFsjt`IGChHytEV)A$xa13r?kT5{WQUy_H}>77L@W~gDMfE$ z*?C3P6)68~4cl|Me&O8$rp1lu(82{@6=EUM#}iq;(mWxF(lIb^hnwFBg4*U|dC$YA zO14>;m~S_-WWWo`J)IkSJq;T}?X_ z8orL1BK6H3)2EIPPw3w|#bC5g9H9aHT^0a|jLf+Q| zJanm=c04q^q^YK|Qri?~*BMWOraCRT$fmdzZL_F>76n0ju^A`E_z?mlQg}&m9=E&~ zg(lb7g zl!|d2OQefadPVpwj?*E3urM_L_t(+ATV~QkTEu6arE5ni{=v8p`CM?d6ULwGUyo9O8Vk-nHRZSY`uA z1B83X`rDt2-7>@S*t+&fs&r6b4j0a)sIsfi*V&^_5n~gyDhqXRI1ZqxfXHqb!b5L;D1H0e2`NN==G%eMcrg zsWaX~+$HKm!n0AvBtwR*B1;2tI(?--2~hQc3y1bdm}mX)*ZrkH689U*vUt50;ppwS z{s~qZCeWh>r86C+kUutD(vYDr9a5WCu1b$YNR~n)L+n78*&1ncI=T84h^<{o-cG|S zI1LRH?-L9NAxpvTgs&e1gM9L&LjCP!BGdHq0J0(#?*rmfo)W8!GGMd7!WhgiK6Wh| z>_6^(f0kS(x65>PbNazxWr^r=FqirrEv3F)NRiy9?;gVZEgurms>4=Ft-=Af>&F8D z^yO;&$=M?#q6&Ag!Erc%<;4qGIxMix&FzzTYYwjPZh|TaW=!{U)^_@^DP{csy0MR#QBL|X66L^*47Te|odx_n4 zwBJ3QnQ9`WVu|owyT{FtH|ZDJ$!~yVuN#`22L=j6Fy-O;W@(>O>pc+73Zb?k=F7Ss zEnW8QN~(qszEw0xG9?(i|?&cPXB<&x8k~G@IC! zk|{1@D>EPZ%+4oG4i6aba|XA0zDrfz4SjW-T<`f z)$h1jE&!yTny`~>kai_&dm%JvOSUrNPeJ3BHHE~)&m)#Ryn*9hWVBG-&O)ed?9|}H zy8_Wkvsb{9d?(apXpKo)_GPaN45G=V&CE@R70WBrKSw6IF3gxoFm9+V1zlR!1vt!i zHc+AN$UskG*G?5s)~ah6=|1>nR~eu}$g2*X?L0w0T>e7kVQIIS&BU0^pLDRN2x*?%`_{`Dm@1`pia=$WM+YuqVXU@%5D}LwaD_Ya;6!?}gCHtSiT#+0~X4-1s zl>JEFs#!<#jGJon%#f155T}Ru(k-xX^WH2PpsuT;(|s2JN5=R?Y1AZDfwsn#_srts zMD`9xz*HrFLwDX~>Qmv28?p@fOVk>#(_HX<2QO2VP#soH2!0(q05JumI-X(ZvthN*4x zB*+Oc6>y%TVb26Ks@n1CrQUg-sgg4tN;eQ&Z3dvi!WVzjG7NKtmvB7`M!nTh>W7(x zEh}%tA}Vb>#Lb~ps|DgyC7$!U@EWQoLKoXY-0f8i{Wl_@#LeS4%(FkLH{;RfTn%;% zsY$1oEmMfop&)SzXN*XJugtihPCK>wV`ulR8UA$oDX$zU@Pz*urZkoMdIxjuB7CNtU zdDilA-SZOAJ1<2EAMyr$6kS=bE7=@{eO)QOz~W~A9kv>U#BSMUyV<{#bfZ8!d28>R zwGIKMC?Lm8ACD5Unq&#x$6i)GDVr;Pb80bLyhs7bTLeGj^&Bh(J`bN{fLl8nU0zXM z0<6rb!+gG!%n-FcOVlD`k;I+uZh$V$|BEQW&8JJxm_1i$G->2$P(omh#&w+Chq2N) zEMacfnSLP2Uw>}bS6PQ}h(a&B=1O!vF3ny(Dc2j5x%TcF2ni!|c8>yR`uX*`5Q}#C z-Y8HEF`TBErV2sh$^i1cVR%oZ(k{M`LcC8k>SZkEmGK#eIkLS$*@}&LvYNsK(@z|{ zDiE^f8EZC@g>G)p884c2Zi+cys2DyeUpK$Y`cR_=`I1cctV-Ir+C;jWB7l2?Tvhf@*wAX3_t*(8LyS!tOz|48 z?Y!H&6tS~OS(6C$>;zp_tagYJ78?ql< z#folesEeoHY09@e&dA^IDc&1Cdof%3CMEQ<5<*lQ7Mx9+50I{d^IrhR(Gys(_15+^ z_~X2K>8o(Qc*eG}P#ib14qtHu&Y16lB2_6bHjg z8mbbn;3&uZJEou*!X;Bw2soqjGaa%AtRQRuB64%ROU};x`k{*hPW)BGI!dHp`A0U6 zb5L?#n)~-Sm+A(xpXvJMbf5b9mqrdi+DCzLTYMn$=Pr7wQxYkVL6I^hIW1`P7KB=1 z6{hAc3}RupuVriRK|2v!0-Ml@p>N4>ZKwNAKQGAj=eaE>J0tPaz011(J|>?yTf2AV zMlx=LOauPx4s(#g>7)wkei5|g+-ptcRR?c47U#-7YBRS=z?Yt@H3)};d>J8J3fhi9AC#{twNwHv)r+P0)a&Al@q9mGqFfZi1CY(c!p3hH)L0^?>n=tvw2zEO7> z075ET&Nw^5F_-4ok1PEGBc(IbSg7DUB4Xj+DSmDvfT1O^out4|T*t6h{s}P#xheqm zE-28*BQ$(dJa6UF`2h~0g&s2$h*l$3ct{q7pZfdh2+6T&@~?V^4DPGj`tgd`Trw$) zZyFkqcaGEl_qGjo3VC+!=}?4}&uN(%=g@stiYMkSM0Suo$iBd1qyWAU2S7X);w;w@ z`{w@Xv;&df%y8^%nDot+e=$SQ~C)^K)%v679|jNH8i z`UPvXEK>MpM7Tc^JGU$HS}w_jUw5s>VO_B{6^Wedn*V#Vk00ebWsZ>aGhM&UMDbrF zBV~42eaQM-^oH(2z4ax3a)E!9^KjtGu(1`h`4$y z3S;N9nSr7uoxWA42mtqFwC)rF#J74|zM*pvDcRfJB+9wn$?)A$xY1`Dd8?k46+ht1 z+F3%4Exg5s-v~z4d<)1^eaJnap$d>W_T1(7v2ZC*C*R%6!ECgLFYCA!E#h^- z?p?~>XFX|B!LPNnVaD~}ShRF6i(4~p3hENM-)aB^xX^pc-v&t8<10eg{AkMgrUe4s zNxeaL4hG-ByPW-zsX{39+wIDgc-Z0e+cQ%+PFBsA<$#!LvB>yl^FmtC==8fV40<`? z2K{ufgi!X4eWp4`%*o8!cjvfdNyugqKVYLzeL&xR%SBK@Mo3Mn3g?b)y6L99HGeLt zAjBg0l6yJxc&n|sQ=9Qc>kKIr@@(l%samZcrDTO}Bk{SlRXZ$VBefkY+}I}{{Q+3| z{qyG{4Pop;I;1qLdZ5cbQU-k(VXNr~gO37B*(yX@G z$Fj^1eqG$f>;&Oh^3iix15AQILNx^nA<+*vK=#y9hZqA2oWmeU@IG;kJNRzmLQ+MHr)}Ss5q`Jy>8U?We0g^Jw1lY7_t3~(enxjvNp5_3^j^m>=O29f|Sr&yNJLIRba=W=;^A1 z#M8!hZkS&15Zv}2NX3KU2`9NniZ^DEvf3ykCbJ1|X+I1Rj&-yGgb`0tZq|mRMy4yZ z20`<4&z{!=9(}S>%>*Y16D!a`nUuINXR;;`kS^Xc@%N`n+$mziC;Z50)giMcd?%9E zg}0-k8WHHfEl_Mzy+$p$R;_W;Xec(r_{Np>9-7?3WQXdRfOSur{(XQBI|wSD283)> zKSraa61Ih7m#W=M$ry`VU4;0`P=Nl{o=i+XcIcPPt0{RBbSk-oSi(l=1HO|mLtJni zbGuIZv4u{FIvwN!t|Plr8#82CkaZjeUZ@cVTmfCP-W$u-tzu)J`|9hY zyN~iihMFrmhkhl<{8fAPW9g$SpV!_#_HI13u60+W?x8&NQMtm$M)9;q{y7H?IxhZ? zb^Y|$`?l#J635nWKR$K$e7$^+UfQF;7T~yp9|w6J`ek$aZPgQFBeR1ODI)9f?<&lE z&Y%8zN#cdOn7mH=!DBifejImVtpy&akog=bPIWPSa9C&VLP&Yei{hK9@^4Sdi~-}9 znPGS*fb*GpkBI9GNxV_n>pJEx?(oq#`Sam_b|3xKe_BTyc>C(FP#~S5;ENLd&HC;e z{6D5}-3J%!ZoNHuS<9g4Oa6Yle-qx`Ico9k#A!oc4U|YCIEp;`O7q7i{|beq$crc4|0pkofiw zso=P<&x!x4_qb$AcLbh3{YEgq`|!~r#q62JLl_(V8LR4p%D+~dJFY#rVCk2W*#1TG zr}m!7-Ku{NOBi5Yu5Z_tf6RPqb7FL+vb+Dqrxz*Oc@N|?M*o$o$}6q15II?#_x6Zj z&8*`4kNf)t3+1UNUVnOVfD&`%?Taf{AH6g%E&5ASNoUC5{9UkkQHbO}f8BZcsTp&6 z{O!q0SN)0tXqcwTgC;u>eG^=9#r zX_ThR%T;?c!!M#v;E@H6cW4(%K0j98$AN`kHTl_S+M=@DJnjdb7l8rXFkO11K75$5 z6vw1K3r|$VBWtAK9;#r+D9{iYOZ_1!?OITu?a>Q2RP}P~k?D!4Tx@lYQYzY)U{cI! zeyh|fJJj9i5YBk58M+l*yybXR(+g zW?2W1t-jxcDlvUOU5RBtY15+-I?R6YJd6)hB%g>6)vzkvm?)E`qNJjpf_&g54L^>~ zmXBm-X1~d$Cj9fc5q{C^k~mu?=DqbRmGPKv-wZ`;jt|7@{Mgd`6>i?#v!)a%79Krj zN77RfRU5+1CQdWL#8%~xuEwC7>A3Kk9|H0P5>K#2e-L^f#57~C4~Dvw>ks_Em(1VCaR3~4nR;_|=SRmf#3^pk z7aMHhoLfb4gBeyd{ZP0zFi&j;?^LHeyBs#P z2FZ?GQ4uLFzvOg$tiVUu>kzs1B&(7hXu%BwzhS1Fr|<+P<{qLlHbU&&(5k#|!h z#P)K1+f=Es!Lg(@6ieu5JiV+ep=t?}l~iIkh?BE{&v2)?C0jzWlx33k`iHB|(WNLG zb(>dhL*gT!vtnawFDj2^#m48xAN~1Y2c4zjRiGdWaUb`M*o}?9Sr7iX#Jv!Yd(4m> z+9p&uB5SrxZ?OW%U=`8PpBv-u{WY`_PgCY*%esK9l6nZzMU1@E% z{s;`Jk&m!y7I8OJ!sMCF{S}}hSr5PhG~^3AeD_&Yx2M0x6@U9N01-gZ&J++e&leGE zodOB!N$gM=-g2W!*o%d3>xuO7`k5?5vc4vbX8`3$+e5DOGum_PtxK=tI^q9-xlhN2b>c#lFGfN<9P#_&ff~wty z#p5?f1@M9%-sYd-_LY4?R73bSD!y%FeEz;VHZWm){qSY~icGuGMfTfis%{q zs%IGfokacKhJ)${ipp)YNThHZLaMPjsZnKC9>HFk+$EBObILx2sUtwA{z1i+Dpr`8 zR2EiUYZy)2^g@_W=S9e%bEx@=t>17wLK`Pb;?D`GyB$Z6x2KDUs$y9OpT#9Ga5?1z zdF2ZT66Gmf0#;=^YL29vM-6=%@hx@hiRu5ip~jEdKk~!<4SB_~J*R)(6y~uf*R@sV`NY6ix%06Np z!<)UX4{F+eO02Sdvf@bV2!F0-B}Et?bTk^7kh@cT-TtJw4T$pibNE`Gf@7u-#IJO) zbk>91(U?D#VGXGU@49Y} zi*k``%1n23el3=`;b7t%+mf$u1l`@pzHF%ckLhqt>%7H)Ay8avYa)VZ^P`)&Jt{sFJKM@!S6Zg@er22<&58BaZaE*YGsGy5_D1Sk9d{Y z+RjrD=jcovNIGlm6pUl271M9M@gH|j^WD-joINFK`s?PO)mSCb#a)n?yt64-Cf*^3 z8haXe-|1$JQmIM~ejp8(^ANqfJP%cBxoQQ&<%!@&D>E?oR@bdqUKHtSc1R`f*I)Z7 z4VqTP&3VERyy~#sfMv z?qv{qjrV>&=hLIRVtYw#-Y=4p<9asZobIa)^VTy#s^kpNXm>1U>N!&|F~zMCUGkq= z@(r*azmd0;Lb|Z?>4A(fRvyeOjb(lJ|_{cr)57KFPy#sCQ?$T1O|iM9ZyR z@B|fsICS_Og`wD92{z2aE#8l3tk%c#GO^iBAY~$-}I2)C%^PNBErkx*Ma5d(G z!DMN1xR6;@J6Z+@gNlFQMvZ%zX0)CT^%7K2y>~;hvzIpFD%tI?45=+%RgF-f7q0_h4RV>QlWdN zn?7D)G?^kaXw+*3g&lRD4!^2(b_ZQssbW^UN+z*CO2;{DB={;B<@F*x!Z%Sp0SbRW z5I%foNQ{H~DgA1y28k)d+OvyplaD6dO%F(QkxHc1?{$D3hrGP7Utyl!8t$(`b>;HB zx`thYbF?EZw_8W}A_#MGhC#?NT~<=xM_0wm$gNf2<`k~~4j+Fy-qpC)M@+z}w4E#{ zFB#D-eb9%GwZw{gVQd5t5_{C3vMthY(Uf(3bGU3+N!J!EQ0gwUKjXWo@T!#ZT|k$76Dw#X2l%{YcD}+7akvN6N<^kTyF&& zHQT)|k^S&?e*J5vOC&3-I@2MZn4_;O0l$(mYDm0)hK6pW>(^|jW#L2nK7y?06Z6O6 z-qzf)K0Z)uQy?Js64s{T+Js9|ZmxS9g?=-y=Aiy3?`Z2eeur|MlH!eGXeP4RXO<#s zj9?AcLJrZ$xLb9?K?b*iDi#V+o+Yve`|8}HUJ!3pP{he>%Ut#KXAC)pC8x~n!tEsU zq=tT?R`LEu&>n2~$Tpf!IdZ7NZO5|b%DjgrM2)5od-@fdcvtMT;+^oWgjk_h-Po(< z{`ExPwa8;deQ+uL&btAQRzqH>atH!(QNQN^mq{J>qWGcTR$En?Tb)*v9$9`hFdtj} z*%t9e5>CmSbnuc=v%qESZsxFEid8XdKHvY-TIT?(*c^9P@#&8b}uoa?r*$7 z0|R|3c6E=bXE^==d)Q1g%{%>YY*_s^-KaV}ED}fhWE%0Ql;h-P!dcle%x3i+a%w0 zkg-={Ob;Mj$;Jz=CQavxmtyR{jVrvZSAO1LvFjAaO>eM;q{W!(*bv@HG@Lm{6 z$2E1t@2w!Nfls}_E__jsZ(AOUb!#9mWirk*CrrM?A8zWWnBx?Vo#y{uDNySDqt;nI z{qhabl0Uz0H>qImMFIUz$?7Ng_1On_#x|YKKR#AANT#0&tzlV#er7FU2GXACK`gCg zL=UE)2mzf|D19)q8H1YHY(bmRBHpg;xV)97H8D#IX_Jb}L(@kBLFURHb}YYZ3}bgT zMrCv>fp}fE|I4#F=NEY7rruHlx6aLbp^n|RUI!d2y)*DF9jme6%8);vg73_lI-Y}( zm{h|)Dm=&ArmBbmIw?{0p3G@F#aCU?#O7hLXAy-VSgvcY8_4#YYDc(36Bs{nTMV<; zhQDcMMx*quik^Pxl6)aodNhW{K@bw&ucyK%i8O)!1MNOeHVqccfwT|e>5WK z;g(Sflfo!1J|&itbzMnx>ihqM;o;~`3Qb&VaW4g0#_6v{BhD99gNe5DpAx;HyUR_) z@Stc0ZUFi)t0K^l*phWwn#BpY^v6A6nG&)}IMD{-rH{v0v#ogzgsVeiX~{a8MYh1r zgCtcAn2tpDLVM4u=-P@)Mo;(&$pug|nf|ZT^(|7eT@kF;2Nr>-OkKPrH74KMp|-L- z3Dl20ffVTIbk!xt10$bn-bDbF(TUR?XJWWcf)0;gpQ1ku*Wxu_i)heX4HE53HOI$@ zNF(&J(+~g$uHV2s>~5aYVv+fXC?O_qXiWkW#*T$;s(cc#o-qaEimkERaVCtUXtSH# zqhiWQ`ym7l^6mdZ6|hO@jbUsM`UX7WtUJMta2vzKb#jo=HNc8I;zIw{gX!t|g}bo8 zRPxI%qS;7os0&Hb5kZMcy+9lX=eEeo+__iu03gGa%5n#Lrn$H2=Bf0wE_2EjLLuI6 zb$Yknu$0zvirx6>*8@9JrS0mu%#@-p1NJR#_DAvT=mPAUr&65xiIPFGnaR)g2Atc7 zOQVF3rpfq6DPr}P%`npZEnVll4_A9+V_Nv%ptZ>MO{!LMpP4le-oS=miScGHwx}bk z9TdYMA>e@@slfcpL__syG*iK=>jmf=_y|8djMuoFJ7wg{h(ysMBf6j}>_3VtTvG{o zq&YkGaI1IS8OK)dMsE_hHL4M+0X*vnI-)p^ zNDxxk+_n*2D`Z1*5KLF^v;s=VG+!B?L*;y0LB$Bk3#`u`1+(^!t~ylrsGi}C$^@)Y zQvQ*IP-^c^W89P`HuG*zGLZ5fR$=)DB-gC$oH!_qU1wPm0 zrd$Yzh^MJzUs^RwjYi?kDVf5&dBWw<7*8taJxjipbt|9+!1K3TfaCT&8!a<3QNSL? z4*ujI`>$E+zHf3Xe}1?tfZgh*R||FwExA%O-kF<;@op|({Kb`CP`FTikFq?8nGCD9 z6HFmRA^I&2(Fk53tD)0{HLRNk+-F3^7{Qf$Qq7b9@R^$n^Oky%bpCU;@UBV8@K3RS zIwERxd&w9@0%rb!V_jT1g7M{F@p4Q>zK8&3R`a7T87V^g6hjN9_{tL-AP0x@8qHi z+caIECmz1a*E+3O5u%~tt_oJ`syODZiXFDHVA)aJG$iaH$#$vba7m(zKM{{5XzLu2 zjC6~ncBo+nx)P=CMVlkl3&qizXzin-mdSU4M&l>!qSSvr-=rG14{a`XlF{X0b5E%P zcxuHl2#7eAq1fe}swp9=2F82W=c|I*Eurwxv=pC^RFM!65v!nve{Uyfr^g9@wR80M zz9#Vp#8X7J)45=uu)>NGWL^sXA#IsWF+<-Oit0<&4=5_JegW8^tg)f1UUYY!x`~() z_)3qdV}UdQk-xm7>7&lKZUF95Mi(XL(V^bb!9|wK$G|MhxsH z8;X|1t=0D!HQpqx78B-AxSrOpP=g9zPfm;u$40DCAuRS5NjL!a?x*CqxX-a zmX}Aa%NiXVQ_@cFCa=6OeQdIYj2C-3y&0~SHD}@qtY8z-b;}v0t3nhg-Dk3NiqO*` z+^(wW53Rh{~>!ccdkp{Sa#;YDBdFhQ|;MX$J_`q5KBx83vV6;}T|2M)Hu zre{!aT&Lp9mhxwM7gb*q0)(sN9`OKLKlkO8I^%7<#ly_yI+|myi!{QY~yG6NzIA&?_ z>NCdHr^T~g%S9ut(-|ys!*fd-g1^e>!EN#EfRgh`ijoyji%x7o{w(I%9T;2T1=nvkSjIi8 z!y&5h$UB_VVZ}-tDFTLglxMW40<^jio@;~oNEHb4fuhupt|)Pcy?u=!6LAQ&C<1fw z7s(&3X$uNbAi%M06Iy!#zrli$|F2XQxe7S%1G5=lil;85b6Tdw{3*kq+K%&>Sr+4I+`oGuR zpydk%PpBuIla+Q(!;;4ooZj!2h&K`-){@IL{%L_@s&85JaH}U}@FIE+GX9I9^RK_?VE~`4didMjAm7q) zK01OGCpJ~gq&Q6ri6uYZtZ~uq>@>$2>HhalYt!)>A*M-r$R3P&s`ACW!wH`!;+c&m za?e@e{a;w7bSTFMx8l1--7l1OF8(p;^WNIsx|$Hmr5?L{O+{{MS5Xqyu%~)br%f1T z5%VYc&-+bVs7txL`cEpcLc0U&f+?MaSk5{j??E=vU=UmkI5+QJKQLN$TU`n(+0S+> zcLS+HR3Sf@TSUDdn@%!3M1uK>mr8zGwvu6iw)<-&_I@?pTAZD?Wp`mo#}Tmnt&Ya| zHV?#kl~9s($X6-I+_9Pof?2fs4c&zlA;~%xP^Qtp+mR<~e+b70IZ*fSU>i@dJ(0#W z@#$3vI@OacX$Gh!*)ox=HI{t+0QtJ+f5K3qSx^h?d9USZz}VCPcXDSY-~{(QL}Yug z3^Y?ydhEdS>ZSStKjJNWTW@R=UG@{whO53e5EKb2PDn0k^c9C0se0w;-?N+j_W3?T zE{rsTt`pXl+0QGyk0QAE+Z|e8WD%WrS?GtQ)9B^6nLXz*%et~CEk?(O3(-S=xw91|bW#(N8xj@dx3SdgtRE$hG7c1`Rv32`H7 z_VI4{^MBe+4fSjEFiB`5IjxWEL*IvMhC&JhW3&IO=5Jc_Q~^;XQ`>7&?uGvT2S96( zYMH!r*7EGW1K$LtJf2}>Y}jfNCs!OA@Ah`DV>uYj;3PRapUmn=z+FLB@jJGwkI%LV z-*IYnF{;nz|M{P|%?3*!CzCfNKnF&vi*#`-?W(JUX{TgIxcau6dfdCwv#%-*t4#`e znvt8@BXI`?kCH|PIS2>jEeVmQ3SCB%f5(u;`A#%GG+BLkXRlo?qB+7Pd||zghiqUK zbG*WH>+m-b5#-Ik0^h=c>)yj|G^U|v)^A&dxlNuE?&XO{E?p7GA?`WK1{uW_(v}+y zeM!0nR~(?|h%W!67&SVZ%4}O+WmD9KPf*!;eN@`?<&`So-nmSOu5OwN8{v>@sORi< z1p=-^#$`yaJ!SDC4p?gv*r}yJI->a_24`ug6=< z)V79=^pX}?-k0a<2!W59Z!@kotoSSJfKX4BSvekB*@ykrqB48yb@-wETxJTC>qA#Z z2HS;is-_5`$C9vbq7(uH5^}Di)sc%%tnZtruc5+aUG?JKIDk%!+)}paisGdQ z*{x0m1Dz~y=Y2mpoU!e$O0EbY2itEGgQ6oDrG@w`L0Z>V+>_@*YP^dzB-fz#%{?a_ z6?Pm3AgEL<>GzrxXs<8$A{qeauJPXqGn=(y)+KF}V`!JdlE9bBx(f$e zN8BavgcOZlUwqYZE2e7llOuMmd;~BQM-1!6neYhMoH<~{bmW}}XNgAJjl98c4onK= zI~Oa7BtN^>pvw=7m}_Kmp>OTVJ4X*ww|&bWWWlf*6bNvlYX*U>1N}div zukhA4K5UQAqhc6^;WVD#$>*#?!lXZwJf{4L6^i!howIB%R+?f`juFQ@B6%r3V5@Vg zv+XTHU@H_LJ!cNsbETjq&C=E&s+C?)RNzBD_!|uv_$wAQ zyRVm9kGPU?K6&eEU2xz&9I zGYa%E8Q``>-@W?ssc0YBG40XvHtgj>%Y*Q6Kn-8P)#v(Z(w+my@?BQmoR#)cZc;vTG5y3)-xQDB+Vis6$>|_Ij9E~7yKuA z36tJzw|%ATNU_Dq!dy#)k0VlFiS9p}J~%c4wuuIXXhm8&78gZ#0#5aJ){-Vj%UB1S zG0+gF)+?SVxD=RGmt|<;C?N~9=XP$HOly|$9fo=r0|eBRd&Mo`E~|^hb9 zyY=MebRc%M$ZkGNRtmYMJ3_hQ>_TTJ>s(Y#Em1ZMFS4zG!vpOtd^4^a>GLQ(V_Vv< zM_ODR<43Xin)Z<-!p}?W(Zgx`c0U8D2tMV%TU zq{dAtp1U6cgscpTv+B4h-g8%?Y3o=Qw+72LZG-MwHgjvn;iaCWByaMTErnY%1~2Vo zo0da&!IfO4LU?H?cz4WAC~i-2R%4qIz`J>FLibv(QXae%?za2YPAKjOFZCuQ1>m<} z`CKIkywr`5B;vlCVkdNWY;~^xvoy3b37)$9L1Y^yxtgmwRReBy_HT8r{j(JAvisFs zC@u#U-JcZ~&7!d6G9bRg z;Id=0X$zqqFPoUd!kv8UR!+5DBJ($oNbGD(n=)e~CcVOCn&Mmzr>b^&(C_H$T^+Lp z)h4TMH=|>hM2@5q_j$(}m&F&>xX!$<_^-X#k~!#Zm~#Em=N+MRuKM$Qppo zJK&)j5B@>8i{QTJ-Iv794ZPapiW@etuE?NxJ{Di z@QE6q5z)S}#rW-wmBjl`i)I}p-62yA8%B85*f6KkTXOXz--}86@5u6e5@vi<_JBYV z3nX`V&yL`?h^T=Ea@`s|HIx`7&O@ud&K-jw$*w{*$qw&n(+hA&pUj2PP?@%Cz|+%v z$&FBptp=+kMX$=pkwkav8`!t`*e_Sx8gE>qZet4&px$J>#+doWw6}hR&JSXV%<}5C z=9zjv(su|#bSVz8E1|re{3+8|u<8LX?){jjzf;#X!CU`!m6EjxdousCL($gWW@BD& z&`-~202=7;W0l-qJm-x!hp0*Zo-)mE8M)~0C1lAjFT{E8O*s)uc`BRHMyWSwPpKSF zd-TE*B!`%#KRU<|)pm94k;3K`GXgmt*66Kqmcug`@g2ed%1ejc0HCRh z{EGV!l_L5#`Q}Yv(dmEfgHv?wJk;v~Hy)og536ud!eQ(`jwx=;#jOpw6J;R>(O`3B zXoi0Ee07$TRgcsdJn@;{WO|9BBp`3$z6ZCfjj2m9XUh1l%Gjdw7k55>%G^2a%I+>$ z*H>;fM@klOzlG_UIB|6LnRf+T1++`mxqwJ@L{qD*D6rwu_yL+}5_~Pw!M)Dwlkiv1 z^%bYm5}x>No76rxE*D7kackbaIP!tW^j)!CV>cSr!ZGNA2W#t;p)a20)SfvZymMH_ zaqO`E=*p{E50cB}XTLV?TNG3y8E*cAUeyL6MQKviBd!kb6l_6@l?Zn+An&H*B=CY+ z^yc27xF{6?MONovoa{71XPkPh5`r@ zjjski8L+RK^uDskXOPBCNk?jqb0=G&$^c3d8v3V8Ewt83wiqbJ8KtI#ncaGy>6vm&Tm}Z3O_uTBpcQk?*@t z@zs$BzrL$XpMUp-S3E#j!}9bdr9alZlb<71W`Pv$P~zxRV`Qk2pd;a#{#VbES~=Kd z;!>u6t%teVu;baWA9-qjnDKqyX1#CZs2;-r^IY?!-=#r)=iF`j$}-i_={g-(WKQ;u zC%4f3=YNTy&>BuETyWlpU#85aHSv=Cq|%AcT)y~++|n?sACEi=T%p^KT*$4`WW9&Y``1}ND7^S(&*@# z14$^-@mOB@NSJm^9u5z0)_hJEu2@Vd)jKcGM=cK|&5qU$Epbtdv-;Fd=i;wt*x;d0 zl6FTaHs=g@HOHRRJByEyBvk)tX=6t^ATBvxK{|DiMeBd4!;A>oO!UT49UvBH#Mq*A zlW$^XTzuri`)M1P8`$fj>VLUF`R{tuiAT|?-9EeYr6BNmWCkUsDqHnj8juho3DIzM zhb&JxG)>v~TIY>O>0^SuatwBx@(zn5tw&`LdXc3w3}{Fo&^u@?R+lsZ%Q&B`xQ-8R z9elzHs;dZCg<(NpBxAb+L6p5@dF$L0V4r#e$QPs=6$ah`GNdf(_Oe1pbR7`9VV!cct#S2_bo26;3T=(`gCBF9fgh^c zcz5^G3))zR9e0F)vcgt0)SF6X^^J_-|I|UG}jUV&ZwatXlIV+FZ}w# zsvv`3%pJn3_UXfBcy#pVMqUt*Z54ztR4nnye|AL`&zDFbE_zCju=0um+U(_wH>P*> z)J)KX3YhfF`>n*p59A8J055yn2|3u;V^57Ts=K-APoy@lj!@ci?GEf)-wb*jLq48( zY2z!)*~z?g*p9WfL)S4VlAWy0sKu@_>iX!*_CvSJ`>$NK{{h-I{kaJorFePo^gVMN zmGJW3-_SEw1Q6CeH(TIUwiT4~rA_4W5N^a6CS_gH3{I4lKzWstsG^ljOusO3x^n7_ zb)Q>QjS5dRkXSL<#C399Ul{k9i~-;ANpch=Ri_59$5NCF>J9@_Ozo)W`K7#E*}k1O zZB$zx=^gZfFG(a8MZEXCL>qctca@o(u_!l^^xl5T$K<%=5-7OYVBq_FTNn7bz@50F zPFqorSA)3vGNKW=g9h|h#IG|AZ4nnLsk}48a?m_~KEmdZ#hSfCDDoNrq_s7dE5pwqSI!2By7n)0CeZ=q{BHWx4W=&GbBuEHzRqe@oK$ zF?)Zv9cn|@uo~)PqZvLYc}8>hr{$>eQPB)u&A5}hH3lF*j=cY7!+C;^;2zVp8asv< z-xyeSh`b4|byjj&+>J70(6;G}_#``gY*F>ypE^Cr(XcrJK#aI|HN|BGVt}w57RQeQ z)hJk?avWi~-BkNIV#M)-EofP`nfnr~+Rdi1bKFj^cdaZ$M0e<{OL1UiyHbPc-zd}R zetK((c>Mx8AuGSDcM=jq=&JG!T z7QWs+&u%1>e9&tYK%;NyT%K5*Xs^c{)$Jjv5UZ72xesH>c0+Ln6*vC&E`K2uPH!L| zJ_2IszUpTVe19|!?$MbfgynAeMw?wLSm>~>S#m^jKE9PqL#bQo$)=O&Dhr_o${c_H40g4lKtEs65{NM8AwYaYUY&G;!Q7@<=_n^q=zC!}2^XS@C zTn*H_VD!=dB8B0%{s(&1-`D^wg$iOhyyC@fVUsEPMpAa=Gt4zgGi}%O!r8?NJ|np5UF;Se%qRDGQ|yI>l5XM7qOGC4tH!EVF!KeKV2TgT%a#+$?niL%LYE#0PR6z`#?sexccmAU)*xcU&+=D7w#U z$|+t1ch(xcWon`H5}`Ma&y{-(nu{%w+ZyFQ>O)^Nt@DzjG-|-yYUHw!pNn z(JAtre%S{P8W_)&>Vy;GEGJyc<=npqMOWzIuCW7gEB713o(J-1oY|X^;P^0%dnG|n z!?z#=xmKlLoP}vT<%>6$tzLY^y{~4(+r2Uh*WdJ49rrBQwqz>}RK8;kG1jBP*3`q- zp7LZumVmnvr}$bZVO_15kfqz{@~s!3HPlxf%FWLgh=dBsdZM}6i!h5+l)6pq+n0+M zKck{zRmCACj*gbmCzc*{K0f3fdvoJcmJoS6=e*cc-JTpptTs;TBS%-S_WgeHyt|4l zPue7~asX=U`fu5dS1M_`jh>m-NA-T-^@~Rox|*Q8Unxg{s6`&YY5C-w|5mk>sc3Iy z5opLrb-!#j1qcwbH>YaYQ6KijIJY{>vht?1U52PD>OBnPB-+2F(7Iw8AnZy_8`pq6 z(8rDy^sXOG!)9}{sfn@S`J8MXBUzz_tEjXK-;nYDjLFD(qZHAQ%G5B-QC8)qn39(> zf{-MR&)mKb`}bPuO0B~8U&>ui2d~x!{Bl|LE=pKJe2WI?DVA@Ds$sQa@76eTvTT|W z4G_R*qPKqMmH-G1oYj(#8fA4D{iu^vyGM50F(-`^@sATr4Xe>LUlTs3eT_0IabDg^ z@lMyY_pv+jt3@@mIf5766rnMEg3V(hUL^GbrEuS-=cc?!?3-?g`xlmvvnH@>BTjz* z<~1|TwM57)TabQC@+ah^$IuA~XSNmSl~07*>EM#gK@X(s5bS_j7=Y{>{u0X9>c@kU5j0Z{~cI71q4Ly4CXE4PyP5 z^W_l}i<=uUDBSJRX?RIM%>=DOEsRrn**)T<&l7-NV@it^OQSs}pb1ahF3TGnV zZ>Jz=0IAI*Jbh1|>9006BP=(W$T%5i^|c74yxxaTuktme)6^s~fn37Uf#U&9ytxi; zUkllrIF>Kss#pXUoJ$~T=e7u#;cKyhHAG$Pm-K1W$|?m?7+;X-Ff*+=KxRs3V3Z9< zSH|M7N`^iZKYj#5E`kvj!8tHEy2g{QHIr3R^R7B93YWj>EenP6A_@247=mqIrVch( z*XC-h=@_pLc`ji#l#`B14~%`=FG&P~?qW!A)F4=eJR)So3)Su5W?lVx``5SncUqG5 zU{r+5!u*Eq!h8ofEhE@l&Vle%9dL4Jr93H;uWg1i4WmH4QS4FY32B@&5GdM6o@Q6r z5;mMo)K73$om%6b20n&1r?E$yq_b9EV-5Rm13_z5H3q+9V;N^7@Jxm*-955xYk|U> zet#Z!;Cb6h7Bcm=zxtU#{w4*nU?D5*ABeQkiztKTOGI}9MelT;?YnMZ_sLUmU-Tfq zI*!Ns`_xLp^W6xc=GZ*dRH7N-Hc>3U!mRPOdSS-lE<1c$)kbi6=XsOP!9s&@aqH1C z&o`$`*c)~Ea>z>IVb1B(CLu)On98^f?e|p5#K0C*Zg@3K#GL?=v@a?US0oVwAHIGR z?oYiA7XTH83ISPkNc}(9>QdW#4K3CSDhdKBLyd{I_a*saxg+Q5P_!_`^{WOR>E5 z-56Uer;U$UKv5C;v0Gf<^0HXQitEn&RAb4X&#=T9^jdNlAc#Cn^rmILyXSVES(&q# zu1gJ2^eUKD<2P`Fqcu*3ZjZuLa)<_<)*BFuAOqdaPZ#6+d|;p9_>oBj=}m;CJCVCB zV5gREycz_@=)*nX%@A^d96MLP;){oOTmjKwHj}5oj!8jIdYK9u%}ny(j%!gn2cP^m zdmgh{^>b0AL>ne)E$?rqNmB1*nh!pkH&Dt}zTTtQj!dOO9iPt?fE#@pi2ze!1D=A{W|kzYdQN z*2(yQE-? z3hh`f-CHdwIm1B%J4yumx}awHUd++PjeTSBwKyNB%e6wv4P;3h z5SacWCpLL}3bCE`H(cu?3!*CHAs_?w#`&(3CvIq0Ofsh-+56wtz1HO+j`+PFW5g=K zM~Bmj812F-U@$hrO@)VCz6>$!8l^;6A4f%yPDHTVDn%IOLIyyQ0I=U`?wGJKf?@a= zuit#QYgENw+ts?dKCcgNH!{XBH?i-MC2jsyZTLk$+6fogpQ5j$R0Q_Qfz#dDGXC?P z(*ETc(fz(1Bvv4xV-SxawPRm&EWIuc<^Zc=$l5U5gQaz06$GG|F4hos~o)5 zjK9_pSpyo%3rnqi*>Z}8!K`$b^oC#CZDWT00n})Ul*#N%yr*g5btc*HTJPv# zo~!MK!U(sYuEpcgOWxj`)$c_z9)plPKw*{t zP{iD!4C{uxAP%JEu{&5XRYH#hZ;?tfyj!g(!2ND};(%YQDiX^#e`l=|%nfMUcr7} zVxV`1c4VpI3dpgsE=dyZDOC+`@iuafZkHR8?v9yh2M*J>prWZK_f3lj9$+yI>&j!> zgsQ<;&D+t}&R8eA*-Co~D|oXq2P1{gF7tkrwz>a2@Vi~i z@`;>^Rc!8CWI1edORsI`6$s3%lN}y}TfO>cD#oldj-yWt2eOP;M@&$Q%y{&&d-$63 z!o8ZnrpXfaUgDqe=+Y^sa~spyWF{AIk_k}`K$F1LV*mbzGAdJQ^BuUdwjO8bNbs(6 zVR)BMCuGlg@fxm@vl-TDYe;sRgIXmd*TF{OusC{oNR7X%V|?B>W6^P?%q|shlIe!o zS>+PF+B^vmQBgE`y{8mxZD$HoD_@zZ6VbUhDy zz2*8ajFpcciX0eCcX;oe(DCO@f})0I2!_&}@>ogClHAQn9}la^g7LJYWnh=2(6#Z7 z&x7fKvk`tF85mHG-hmc0zl;O*pZ^IE0Q&mu|GWeC7svT1S{)Fjvp^#NFmRS z0FY0bvV;gEy7v~`MK zhsJF+!92zLtT+DKSk2#;=SAPfAc2)|eNrTHxx`KbI*9Ae5ET^sD3iObGPk| zZ}Gju?Q^J;lmy~zs+($g9x-GsHi8rM7|806*#O@OsB^f{Z$>e!zS=a^D6^r43=9|h z>J=?e`_M!%eGJ&3Gc>A)xUAl9L$OWsPJr1z7DK3U%laz`0>ReemZ`%EIT6CZ<|5=BhmJIhn_L6}; zqM}G{tj3vfo!X1pY9hcfpvhOOUaEuBZdxg$#QSHLGP6T6tFnB8b5vD{0;m-vWNcsi z-~W43+98bE-P-h&(+b@VSGX0UMvCxL!o)W9Z*EK|8UaIAcsR1jcS{q%)}}qnPXUcW z`%^kK9-adS3`=`<+b1K0zaCW_-hZ%=x7QlBw!m+lT4mkE2$NbzY&c6#P!y%MX+u-I zf8~IJ|J?KVjxahqe}Er%@}!3##`2ms>wJIIGrI2d$|*_T9pHGs?^w>lV=D{_YSVQO zrdtQxOY*q=LhfehWy3_KZ{}vXW2ANkpB)TLh6jL-8+x|Nj8j8fatisRaHnz4&c7x> zP7{qN-ta8^C=7b`ZCLR0(tq5oE~HM#@UiNeJkBXu-CY`9%{uT?AaRE0lPsJzPYJy& zFCKr0+gieW7Wyqc8~`d^q@IOu>+h#CW7p(&g7{g>Bi3BzCJDh`13WVHvtRG6Ez4X@ zf>&3ciR>Ic`jOs7u_>J>na3BOUw#ICYRYO+0O37h;s2ST8w9S>s4P( zF+%qKs`It~2RJ~-zf>*=<%4-V9!x%!tQXNaAQIzB;q!2>J7&8C>000000001SzjC>0@NGtE@*QR2IEg57Psa4ULxFEClT0#68=*zSUg-6V~*LH3GM(v4>rc0-3{8lWk*@x0@6{Wj2C+Bi?~Veu*NMqb2%jKp@7;GbT6=L2Y5Sv3FayEgGVl3ID2|hS+sP$iPK8i5YF4^UWQ0p$u zjBWO_An3~~O?iAaW%gOp zI_zP~y$4mZkWvbPJ3BZ#>J%dd_+~*a>BW_l#)e^0bzTxtMRFs?oc@P(!MY~ zO*JxcUG%zl0J9y){Aynq0wQqMLgLT@o(SZylJW~;wnD}XQ!JVk%+)1zN+|BP?A~oc zy`zb|ZL5x(F&LYJgfqA=MSo{f7>1M297u;diEebmdV{=*g}p3_TiVLhu5sRZN9;9D zO!fMag<;Pr!C?~zp}}i{g7bulsXw_XAkE$!w6trOr7cHI01oGqV1teZJw4)n%@1{} zTR+s_^ltbYzotzdJs!P8rSe2LD2i|q3qM}lIEtRK3z%A$n0__mdU5rx<&z`i0urh# zS6y-mSt=I@F|u`>I9JAdA;eL-rq`kc1#c%Z15FYLW-z0DHYPs1nP{@pBY)NaHh?i7 z6N|w|YSYl~S}vGn>)ZK4$|{;Gqtdrfjs?QnQXiZNTefzO352>0q*PviBH_7mI~xFj zKDZFy@ve@tq2dGe79y+MYBW1<9PTE3Dvb+2P z*Ew2J2VYU94&HT2i*T$1A}#2&S3P5w^$s$~QQXM9YIWvLPB)07Cp&_aJ8by}krRd+ zKs4rGyQ2W-xyMBc!R_#^R6<@T%p6Upu@sd)T4;Wa#rsMZ;u!Q)$Dox9Q7JFC)%;;B zZ?6a+a=MAgR#v(hf5Mvx42WHz5hh-j=pron%zge#oe;Gl6} z$bl_c<^fShw{=3VTOKvd22=kqTeXt};9jSxAgcdR_O>lDH%XR!jm%Z&&MN35D< zGW=k?K1JtXJw}HoFt1#JbA}Iq@?q*N(FG9KQ?LV*+k2q0#6RI{q%pc+oraL2#I6eT zWtGLAhEjcl!Ir0G+wQt*K0^Ac~R zQ56RId&}ikcrc}6-0pfJkgOtK=!u_>qTu|NW=t^#d^Er*8k1r~R*PAvZ6kqGS^hXd zi(HAA<)SGMfb?_AtsC~E-f*!Kp64?4dZs_{4~dNibs;vNP4AbhuxkBt^a1I@>@i$i zg_ek)aR^1+#dsg@zb~HLZzPYRpP2tWiBm;jJ80wsM%^DaVl`jgm8m2RgjOZ|5g=W( zvKs5PsT*UpMrI~XzyL+N`?DVLV4_#U@Moy#7!=g+hFcOZkysAmZ#n!@`2$lldm{*h!xaJQ9%98;5s@f5T01Thw3&!|IBp6BQOkExtcUqJKFZ zBwJ|XK$!AQ2Ig0$Dn*Qwk_U&82&bGHq99sNWERVX63M`HQsoLjLGEnO$Kii0e&cnx z)W5(cv&3s%qzAhRm!#vK1j;3cChn&Gm^AY}hqD}lA4xaHL;+=1RJ-oHNZ$;j;WBds zw8F|0t0@`)Vp!lxPTt;$fV!HrMixL?aa%-2!X-mY%re6j0!u1L)i4#i4YgOShzn3? zLEBtd%q-Ji|8jj+yCy*x5~#;3bDhxv)@1XcSU#9yUKa0T$!xIG3-Wd^52#xQ{sXlS z-chj^fjFWCP}rtN5nMj{A!vzy5DVlFE!&S9ETS%qX2yBfEPK9yqzSoU>%mPvj&+x9 z@{ajZRs;;!5cZfgPEhm~sq#^O89r!0isto^P?=l(J`uLbs@YQ1B@4hhQzqN?&_VfF zs;qG)C#Hm25gzYtpHPs3`aPlPxePw};$8HNp$F<5L1kAk#))YjiM>)1_aC41>SA@S zk6_w5wIOGT5Y3bX!0WWj)l#P?km1|$h8@Qw2FgO1kD1HXm(#O1l%od{PQp^0uL*s` zD8Lpzvh)j{)`KG|8SsxG@iiZnC@g7z z_yh2^fWYfNkO*rMIY;F_7-r}Pu9TTjSS1uHrvb@Ufm34Bs?Vt`B`G!)6-jCc;H4NM zsRHpqoT_8q+d6CQ)}25MZ(<;c;~QE~Z-Kn@Lk3s_PRb9*HcmTK2Cp{U3K z6hQ5;*)l1AY=u?a?so%@LaPoXhTQ?gqd>#58W#%XjAlv zK1GZ}DmcYit`Q)j$&<>%$7f?*h74mtyr8r;aCE}m>k^3>y$BdCqtTv3BzGJ+=;^%3 z{wRYP|8pF>{IUgaTwH>mX&jV$2=fD7DsE@Ls92`^(;O!}e*I*GIPSsUS}3LhMgPIa zPH!IjRUJ8o+S-YDzg`kKnW%U&#L9JGNB&o8{>VY0v|KBcmWo{e=HzQ%cYc!fVXxMj z*|nlr*6Y-eEijb5))%%_HqJgQNfj$3x%G^ZY=oC)-I_*xfDa64>~Cy~fv3Rljh3uq zry}xkgUxG44=}PaFLsYV1Av&0+61gv2J53L(?5)0N&~iQf#t$6nWcOvx1!vH&2g1LmvIy%fK~#(7J7q&mXrYH*jPGA!#XAW zc&zG8WRk}|w{|{GDE)th(|K4kM@#82Jf?is8T$|Z@n#+<+wI>SqP&okwP)z}=6Upb zawAD!LvCvIj=9CuHy4?;0IJlDD9&NXvk*feomyR>;sWh!+F79lq?sh|shJV<07m+v z!ME0JKh^POueJ$QLLf7y3}s*>Hqq<%2dwjF56eVm625 z(I7J0D^AH#m`~=&e4feo(7J6Q*xp`%00;bv}fiSR!IOd?d+0ivJ2O_hjW z;<5iFO?g8L?le$B&!9V!$U_G^Ck0a#LzW(^5o8*vYZa*zLHi@mUY7^U*lzX_4!W>CNh)xpuy9{Icn zbktqcYLIJRqurr+&p`lld@u}ambKwluBkO3S~s0#Wno--s8Kp__*SR@qs3{b1D{_f zn3S>3Uad3%&TkMg1=YGwG@8+iW+f;~_%@aT|IEkrBf-bF(N#!rX+Vhf5>Shs)d0FD|8PXL!F2Z+%}EfcHX&z^@XP=Y#kdEPD! zp-gCO$AnhKgoY|6zYpHksh8%nt1XZ#4!dJvQs8e8j%?%dNkc(2d~UvA3;zzfm@$2p z)hP>ng0{61jfs1DKfkOiQb!2_;IW?+>;HFD=&>vbj7k7T3y-ZWl07k^KkSB!4PPs> zT2y*f7XA`Z*I6L&8Lx$wt!9elt>AP7G+0y3z&I`WqA+KygQN~0LyqD8!<~A9v!UF& z>lXeZ0}ADYs1yThyEQXZVYjW#SOp%A+nCMnXKuP=Bv245)Ek5;Kib;6$?Wl6l|8H@ z;22kTCl*^DSzJMqu;4}P;nN0wS9&UibvWmGL4M9B-d`Xkc^;z!sMHLjWi=SU3Og{+awB; zgJvC(<^6{EyRD!=WAI>Y>)fIX!-!r=+M2q}Iydf$VU+YIgCh=T&ftuAuNfWPJ}$0$ z(hO+IaC@tMieMc|LME+mQl-pk6qRgu|WTvwt zn2Y>I*bkPxAR)cDWhl@*kpdv7#DFLb=LQuMuRcLFG`Me7!nHQQ=8q#-Nhv}b0I6jk z%bJopf}xc7njN7#2E=jnx9I<_0C?{sI>s#B;yGE(xZHL=jPq}wQe{l}EH`$<{*z$H zr1Qhc_Prp5=`quC+**&(m`LeDu2D|swA1fhu)Rn?9xPBxs|elmzuU@|_r*UgEK}j* zwIf4r)mfT3I4%MU1|nNcAi4kP=np`68F<&Uw!U8qHQ=?#udin_36$DJON5FL1zf?+WSM!<~|Dkz}sEtRR zZ^Y){m)D2Y-XF;#IkrW&N@-woJEL7Uw%@}ZAj!Go$ew^Kl|}@%>`=2TuxD0cp{xNa z4I<)|27n&^f9Ij?9JY*z?g^%M(jH9ooCw)}bCjET)Aeoeo_X^4?7yz~<5$I=5KK&> zO;nL^KXeqR#_E4|=#L<(RlhV2A;L7pAPfTtMfa%wL;DTX;4Vp@C?ktfwc2fh0s&@9 zlV#bZH<8IRHN6AP>=UR-2=s`Way}L}U7W1Pv#kd*zC4a+4<6&d-~=Zs77cDP2l`lN z9iGNtYxaYV#7`*j?i_B_Bi4FnvwWlL2yImyv066>YHhGP&oLL$1Md(P(78+zzPomo zaWqiS!J7E_MQ8>aNk4_UP>4&=BWFUT05OSCv7svx)vl-WGj5afRKe@)=zGhRbFOGAo zrz(CG=zz-HCdPZz+U5emKtCiX6WU@4B!O<`P^c-h2#A+m^2TqUu-}R+2VVYRrJw_)0U7r?-;^#XBI*9OHJ!=O`3N z#o_MW$})zG$1g3FSW}D0=)c-YW0N}bC$!EyU{4b@ z^?`P8@RdsK+QZr3Im|yy<;a%yRZD=QS$Q-Q^g*E(kno<}xC#_+qJu-WJP5>`lk|`9 zbBS`?8UwO%K7+lGp@$H}R=ha%0f9!|I5)WNHVKf!b9(beO45yVT6kU)t;d}%?o;ou zImiKF_#Uh-_<;Qaub&MPw(j)y$tzaC+q_E)5#&(T4wEA%|FJSzu3st9gOlVQa(qBV zx0?8Frn2~MWY!|-%Z;)!vQp$xENL4+L6y3@f8nuS20*+I`#AKT)8@KUM>WY0f2n36%JO$684Hdnj%ptL7^?6M z%k7eYlr{ zQ}l9#MN~)xNap!ZH2h7 zUHXzG;FC!LMv*TN8HiXT7sk^r?(<829KIgh+ zfdDolnC`t8pfyY|=Gc}xufk@wC$y+qR47?MN+e@+4X?b>-lU2TZJ&Iz}}e%?!xUF}+ZL6nOWIIpG)+{la5@x}EMe zkT`%DDQG!>5~evR#IN0O|2#&XulP`SY=co9${qymD3{) zET4k{J)z1PBf@;-GoV=HhEHm>;4hE7L{CyA6#UrW^X*fSD8hDTw`FVdxy-`zug$^* z6VS=M%mhY~mYZocBN^tj8=4VZBpJh$IGH&`fIYsPB5b4ZeDnj1U z>||MqL)ovyKi$SZuoz7E2!>DRYq)vhXX1T+D4t3?1A|PQ@YlSkP!+)T4{!pXN}2ve z70|l=I;CBz#~<3Nx2l=PtpJDCc%I!^5SV+GWyN}S#=}pKgmPxLNP~G{;wJ{qgC+@YVB77j0dw%73Cc6g4 zvekGsP}URR1C##*Pz=i6HZSR~>2{vXqnxFH3{m2HVcLt0VA}((MB4qJLy}}U)mLv3 zi`vO|9DKvi(?Ho|i>;MHV_Y{DGZ}Le@y?#u6C7gA5TAD!>{OQkB5V*`hvy*>ai4^W z18vbZg=hw=eDXhNht|NQSn>JY_0o`4AR$E13N2>UmL)4$N#riebSD-R_7j~w3m}o# z2n`F&X^9T^@rkHY&+{)T{~~M^)V{JMz;jYup8r7Q9UP=7exM?;VK8=~B#E1@?Pj5N z>2ID7jfbLXE-#T7J2hWf@|hYU#*j8tFkA}c)hbqnN|^`TTUj;8H6U?42B%1=jR-g{ z%?LP&Nwq8pLMP7`Euq@Jc$j>HDUlzd14ODh?Q1_a=zBa&{Qw9rhuQCMy)$|B*8BHN7#FV`N z_`OT~G`&D+6=+Ogs^>qu+z#pIa1ZrPEav*4AWLi7oJ8m_f%;sJg$P7|#f*+cu1k(z zWRY&FjlSAkDh08F5po;(T!a(QjgJP(M>3JpO}Wz&Xxp#n!Iuf6LXy(R#b}wsalF`R zl|XPgLO_*4`Pd{PQIIuYGWuS3ls~7*BaGCpKo$G}R3v!hM8{1KE_+bZ>;nGGF?)M2 zFft7IF)@+g&|d3R%kO}iv# zD)OOzHl%)q7B}Z_BXTl$@lULsBEXdAPIjAShIQ;L&mbx6%lK^*GjnQgGc58EhKF59 z@UxfZQS#U?ThYh<@_n*d!3Lzd0pML0CPp0-5;SJ*t`4Nc^iPRH{2T*St#_BvYgMjB zrHR~`OqhI+x2}x@Mfp$n%4ES!(o4J1?v{iCQt+G!jAd45-VXQT11m z{&=BwBTPz5VMMTC7QWb)GED*;Z~LG6@1W}t(JBDH(u;GVsyW%RaOPB3D_c<4R5Bda z;YQV5U&%#1pZ5AwNv8D0-E`b6sJ<}bj|fq}B<6~rq^|iK^!F>UhIRJvN?u~gE7F2W z=TO>9!AaS>)f~#2nTiiO4Sfr(&w#4gT=+ar6!qBOjT9*Y_D@_aGiF|K`=E4xUT7pE zGHjYY6`&NX8TSPNvb{R!%mJK_6>xbr%~|1qzqbZA!Z?oj~&&=#nwGUADK{`K|o31W43>?_z6-wFE(-E=YHD zd3aqxZ~b?a2lc+ijh1ZuLj5uN!s+bnItP4vg(nvk`hT1?m0;__8+V_DrcByZ zJ!BbjFy}VX`Jfth4_qV)hP@s97vuR8vj>{VN(Ad%qwG)g0GiYxTge?%Rlc1EVz7uZ zUpgw}CDwz>0>|t;CD8*trHfn@Yx%Ovm~L)?(1{316q8^gBl`b2QdlRTPr>|7yuFhB zmy9l3N+XxMr{0~>k#u#;Lr0#0?9wCu$EsXKbq-HP$jUxNtITCM?QgD#LDV{)O5^+j zT63s)PpfwugxmcB*3z{d;_?cuXnJ#@0avuN>k+z7cZ>9rzei8Lg%3L*;!%vcUnT(p zoTihrVh6AbZ=#&!<47euAUs%lcqLSDO{LqyvvV6zN>-snmJnLPbQA46cq)dXHAS<7 z!Z)Ctb1NBe@Z)le%|VPikTDO~L1vI9p3sInW#Qnwf-bIl*5uz)-qYQRz`AL|Mx$c+ zDQC((hmq5nEwFnian0+7;sO4DD8Ga0=7j*{+Od_ID~S4ASVF&7YDmifFfp-Rk1m#= zr;BsqcbC;+VZLW#1(jQ1G0^d8%U5p`pCX`cb$m&bcLD4L8m4jmA9yNXdqF>=VLYLJ zgO%dHV%L@S+@yzeL0kk3zJV|6ONR^HfOq{>NYQ-0qJ{HhA$-9(Su^CisgsucO+>1% z_qFNY3a1JqFlJ=`*4Y>5O)qR0W~>V>excjTCvMVWd8Ad@&vG?-X3h5;vxdG8-s{qNafus+B#&4%`q$}20I?g&u zNzbp@$a!8)moBbCM1ilUBDesWabJsQwd%wqvexGz?b~q(U#pPqyfJf%UW{iITw5dc z?$>k~$Jn5Prqrcu8jPoN~6$U^{vb<_e z{bByl3JMY+g^j$P!r&nD>}~RuKdw5I83pbqajS!dU)h#L?Svv&Q-onr96>B1XE3eN z#~gr#F9yxV5#VJr5wN|gg_c0@?_V3F5T)dnh#XG?8>k2>%QS*}2)mNO)HAF)N@*?a)gbU!O~$+{Q_+I8NmM zh833THqhrW6-$MD^!fbEFOeap@(}@49Rk$6DnhW$x)W%KOQH)91yi7*cXKq4z6!6Y zbvhtcPNekI+ZhIoF*Y+w0cOcr&}XFbYkKakP_hfBjieA6$+7Gn!{a8^wgzf}E&gRc z_P6|$(4-#T21n5nR3*&do%ZgXwgKTK7QbXw9mV7*kYq^K5u0qQdq3nTZYl_lZnaCo7ah%YmuN zMhG?UA=65|2;}u4c*aAUJbW2%LyAqUJQ#UtcT5!E14fJoj6FunqMYr2eGPA+$X(l}8~kX0$k1DpXBwxxrxa z_X*B+O~(u~oHq@AwQ(J$xPv*jx~#}x>uM`}@K{9dAz*LD=HpHyP>38p@+-)_sFexA2D>E zqC@vF>FJASz1J_>p5a}&9bgx??KkLAE9A~d^AJ#X@L+SL4SUP;GOGVyZ|2&%0X~># zJ(w!}3*-a(7y#c1Ka^@tp2`;k63r<6s7xc%SoOjj8-3N7#{4rDsTk1Jt6Z~p6GHjV z;+Y1>Qm$yRzQYLf_OAK%zhqD7 z0`WusF0?1{H!ouKxxr;GE^rMkEfnmrroB5*09BJi!{>=>dK2^e_q&I!Nu(9)xJei zrchswJ$f>m(Vz2gkpn7898qs7TLg+=?@U^k`BLH2rUxpjm=FD=5a{KEJmSJN`nt^hs!c)Xag@M6GwwbI8)2W)3E&o6~eT~}708+JN|HLFo=|n1dnNR4Q5m+bRiTRDCiJZpAzTESL(0yLbnl15#L_%`Mnz=Uru*N;bH*YWD z@=R7*BZ9au#P^Q^uWCO6m~1o>P5oh~WO8APAbedhw!U_L5G0B4?%x$r3kguhLKq6t zKuU3~vqBJ8MVbdW8U1BoQ2l`0A4{MkU83pnpY?|Mi74pdXdsV8*>(>6G`=?*{A?mH z#n}uTrZb`LW6Q`a0e`x3U&-B~lc2)*Mb;RgZ5Hsy1H zOBm;k#2P?_J-;{~H=Ujo_%I=QvaHC?9#E+^R2+tql2;jdMo8XmGmPb6T9jD3^!L#4 zG@FWDte<-~&p{U2;E?~q!cnKF(93H;k z5J0m@l$d&ql`c{?bO$BGqQry^U$?;&s)bS14{EUIHkjNN;fOrB%xmXk!DmZLDP|1CCA z+Bx*~md+Q~fc&9q0z@r2_!ou<{oJS$I4dTT(-UMAbwcD1ySbM=fCFb(JW#2+Pe7oo zh9fcMmzA3&Cbg z0&=-$-dfb4Yu@f~3(g6t-AL(3gbMO68{#55s8b+HsixeR7ReX@F?FLKE8sZcnyi#< zllA{dCwtdA*?b|?OVpFl7^yo+c`I|Am?V{pp`(h==o-&AYU=7sx}Y979k)U1o|eVS zIBU1uo0ZU2jEUA^AFcH4>S3u5h!oOXbtQf$T3qF0XW0Sh9i^QWF%?ChvZL#KikTCu zZ!JU=$xYS5*Em!rl1_TA(|h%kwz%K`nuP?j2rc3WmYdNk!Bmb-V!%#j(PF(2qBOUt zZJD7F5|3@qDwM9FbKXaiafFh^otA#WuI**n54%T?|9frs<_J*m@0Ivzp28`IA&#H4 z`fagK=v7~1z2D_`VBze_ZYq2L`?uoRDXhpUUQXTPQZVn~)D?dqOlg+rW3;qLxRrD6oiJ)>_@0Pw1h|B!_%7Q04*Z_~<(Fj-egv$Mab3h9r0!=0jLECjN z!~;C^I96AJEb!kdPukE1uHpk{$5&@)T6hPzF^!qe2wbkKyH@bLMMHoOHG&QGm$Z~1 zn1+Gp{Vq*T6c&V=h(|YNDEern|D>wPrUA7F`og753{aVpC!|ZH_vi8BJf2go&!Ep} z7m4aW9(oE$EoI9aoL^abT7+A5-Phdz->yt4lT{1Gb4){^D!i(m z*!5Cf_~2}Phn~Owd25Z?D2AjBsS|3TGd2d6inM!=&|JEdn*< zeBD7qP<{}_t}LQr8+fbsmmahVf_M@7d3FOk7K0jBi~|#xx^zf5Qty^Bc-8LhR9-k0 z3@>~kGEn-X)~K`PrgpsSxcPne*5p3+jqVDL)7Dl1^OYG9{rKo}kGpA)8{?hu+N*g+ zkr-hgLfNi-caYT8Xe}hvOm8$#NLbqkf$TdLjD3Sbm++M%qH*O44oltYU&JJ>kAl

O->J<9MM#7;I>G9#;El)vUmWv_#{(`5HeXA@g7 zSr{loqATnfSAdDebmo7-(XtrH9;ZI*%|VLK0U@H{8P7OwRxL59$=zHZ3drCvsBq|8w2jrc9% zoa48qGQRC&G?m+#VU)b3#Ga@YWg$i*dKIYsRga>|!;k-d<*PLofTi2c-$@q|W4a!g=A8l;q~jA`;Vl`ieQ8v7ET@Bs%!_|B34pZK)9_s+yz1r5 zdxvs!``>b1JiQ|dAA1ixaznAQr^xLcvlx&(Yr@$Sga|XA;lQ#j0Hqgpyp}@bL@X14 zhDQclfxyNK4hitjihC{r{-mltHagEIgjitmSKa&uvw#($26X+y!?&sUc}>A^w^FT^ zoh^mjzaD}qX9GhYkWqGi`K{QiMvaF^#qkdk_ljLJjbTZMN^cQ2FiC%d1r~Lptgp(< zKOje+3!i!++{d5Dv9qge@?o)ILGN)P{mX+nNRsQJbnD4(PaeLZ=dcU3meds z@>TxCus3|h#mLdT#&s`+aMXsY0iqvmIQ5Xb&{Lqb|3JK_^e$Z>%+nf1sn?OwK((BT zZ<2TKsYrEyWz`)SsOa(1Sj2sF0WmjL2(=U9*rXyNw6+}I#Vd$|qJceL{{P8>yg-(8 zAFO8PXHXAVyIF-gSq_eWW$#Idjrofm7l1P3i1PCq$$Kt|^U&OoXC{qKqM=N@aMuf` zo?tQ%$WRbXs6fO%q&VRc@T)Jr6>y9-sSD;yCKBIPaqo$<>M<$f%2EfCkC8 zbnJlEub1jk=yXgD2d4F*B=;I16@`>zlg}|3c0H(<+5AQfPD5WGWxdp=d-UlDIZvjf z8i&%_uZwQ)JDa444Prt_f*d-y9f?{}PzJ4^buxz8}RiWVUlUnJb>uZ@S}nDJ?*4OH0O zu^`_f9!4B9r=4FyVwy=o^Sq2C?F|}Eh~ux=K)-0f@3?JjL>XsKszlVaop{6=UP9~n zvkbc-Qs5qR4x`2wnm!k)8K4G#w{Z?ivEPD_o|t%V!Sn`L9pLwE@k~IHDph>mTHjYGfNb*kJq?&{-NqQINx#@brS5 z?7~Vqg}N1Q`2^3oKsU-k!*l%S8s*PE)NT-&f?ZrPLK@o>u;T3F=*&3ifr7D|#(hq{ z&A8qh!_5?_`T24|iK!rX%9d6=GNt80h|?F@cVSmWk;xBfrvTkpp@{G}1$bKrC*z@e z0_k1EUh7TVITRekjyY#zV{}OW@kINh*i$eHTU(K-IKXPT(UaH@#<3mk+1w3f6;Z)S zpq^{w4&YOr&j7CsRK;aC90w)i2#rc)p`)esSiyU*dp{a`v|Lr0Gy%?mg|Q0i>Lte- zrQ?Qm@UC)`KW|yH!gv`1u!+Cv3>rO5eru!5W3hU3f1&bpq6-P3a zDeg@|{;7I;3b)XP7aD1Vc?96B8WCN}rh5y)` zvuGf5uX@1=wpHE79)*MF?vzADoH$-vTB&T%^X`WTjS)cVs&L7Zui^DxoV5Vc#-Q_w zM>nS3fN^uEC2;n(u6Svxc}C4Q|NG7pSROoYp05VQT3hcXZJ4D1LJSnt7vY3*uGRmE zcZOpT^3mr1x^&Df;j<3x>H2c`1%sJ5ytTY7Yslt89oq%A~aUG{E zNdhoOXB$hRbqNV1`9v6Un5lNoG?4>f$7vdOH3d$khNXbrt=|ZdEyDYACxNL_B}o7o zOzgKp0()WU<`lqw>X&i9)IN!>?P4Qq8eMe;$&kI3`NT;fy=!@D&Ob?ph$29oK`1md-LP`D zc@r>N6Xcc0jWm#fZZ>v+d>t}-V{K3cmo`y`ZZu&v4B{}n1_)bvjbcv*luImatevgb zm7?7S;vEpJ8G0>`V9ub8mS%}vRZ4&f!jSreP()SqJ{l%HO6;`yR?uH%&=v!Wu4r#? zSE(RP-)|)1h7dLe1xcz#Yp@F9#5y}ca|C%Y6Wn$90F)Tg*dx456~11L4KlRr)#Y_1 zm-%dOt#~}FdSef(BQit4kbvC4n5w%!L6Qqy@47*)<>;z)(3wcGnFJvzXwmLlZ%{+= zruXpmryngUVmH9)xFR0z#)}C{08U{6N&5gO#6=eF(pYD{r5c42Dd2!fmn>(@ES1#? zK*mNIMKm=c>C}sZBs)$6YD6VPh1dA*yfw$g#$niZx;EZ6q_2I{8(xQr2C$wi51sTu z&QECDC#tiXJVNg|S%@IMbr7@=uwPh?>uFF%ZWQ}4{X%kBlXA%t^ah5fnvHkR1LwVfS0liRErk>? zY?CSpMJk@=zHv!_2#6!)faDTU3}%6HfbV8c$1F!VILFtW4^9zlz~9@TJMkawQHg|x ztu3P0LvRGw8Z3xJnreAE)gD>5yxX$jr^0u<4C`odNzm4-5bFFVgC?-QF_)}JStVQS zbb_^VbpEh)Mbi%7a+0LUupmyRaDukxba z7m~jZ71Og_s#Z=g(6!poK=~{-dJ$5LQk}b8bAj2)+-n5(XiKH7dX;f+Ai$*)FFPJX zNx=Dj>SQruf^qQ^X~b?Jk%I~TZv|>dp1du9F+F>P^yR<9DcwxxhSX}KFJ!zksjWRd8;dixY-9P0pk-Oeogj44RdubO;%gPetD9(a&(6s$k;uu z@84^-^YzfhKUP(bP>?D>68=t@RaDKqpJ^sWC^=w)6h=AUR^^l3H< zATl-JynoZ^vk;RQJp0keGr*UI4gVP`-c=*4TkEQ~1Zc$)%ay2ymetfr2DnF~rM=@K zp^T+i*y3~s6-)8RT~&4+3Y8ge2v_NuK=J1RcY+~LYLFBgIeOJSH4UYpfz3!Ea*zjE z5pJ$%I}F)~_DZmHh}T0*J3Q3P2D|T8gE#(?T>vUg$5zymT1Ojy$o^)Y>`p~ANkme8 z!s5;DzjH%5s-&hI7cXL_w5yB&w&tj*$P+6M_r((9-SMZ>kOk)U%agA) z9u-XN^6&u-i(CPbFT#5|;KM-m(21$M6Yi``>tku~&QGWXdvrGK5S@sc331>yVbUVW zf|^M8h+=xE_jkin!*0n`?2j{LQ+07)+`0Si$T|b3W*(_r!{Uw8-Mdc*?a%Dg_{|>; z8{K;m*~3!vrL2JP*a2~jL>(042e-g}F8kO;0Dt5Ec&R0|UBKGHIx>~B1z;fi_dURv z=VOI+oGg$-dXpi9pakzLlw;H8qMcw+o`g8DqqRe6|5vj45q^o141I3RlqMgmk-R|P;jFLJHsh_$BpmG{L! z97fp@^G0D?a}O81WjLy>XbquYJ9sXJ}zAARkWj^dFdXK zpc)MtAiu*v0N_XW+R4Nc`C24G0J~3ytU-gHl|dz90I**o9f{#2BjPhKM8mUBO4%%o zvXmHu<495kqf%JS{0GI!h4#}lOH8ziqpT8Wz2)rBSTzQfzrALWgQDBAk~#}s6sGN~ z3yRZiU!oM@1_33_LS5L%>#*ouW#l$9o6JBK` zEe+nZ2|?JG=_X#Xm)}Y700`gml8Hty7!aFgTR;d4#{@`s*IZOx%&4`z^LMgJ{lPYl zoh3D8GQvXX?G56{(;gpv9rmHg2p3aSiQa41s^hX%3By^I1^DOsJ{^4gDI*x$3rur> z^noch4u0tueb<8<{6%LH)?UVU>-0^B&l88*5IVTCG(l4=uO3VhECWu_>MakE&$ws` zUQrtYZt*&&t&_xY%N@Yc<<2PrQ>$JY*WkEdE%Sfbl!1G+z-Pd898o-@qIK zy7!$ZJ=KnJeh{Am+B%;sRq?DVK*w{LtW7;Qq@ms4Glk%^jDV%#C(c&e4n~j658whW z#-1mnu7c6IB?*N*!-}apZ?fJ1k;Pc@@xH1wTqKB#t>cTxDDM_=)nBle_~7zG4_O8`R(G{Cn8qlC_Kd6hATK*AYv9RC4k6A6-$HnF?kWn6 z2^o1CBLpBc%<&7sgG?x^>%(b{pRVMee*wY1Rju!OU$s4Tn;OOKPPio8^>?-vk?Z&< z+RQ%TD&CO1Du=NZ`E5c|usmrKMy$lFLc<1g{|~#8=C!mJ%{K?>N#fAFtnXO?2_ZsQ zs?H_bvvQ4-F+OZ~4TBpbCA&dsZ9R6{EA7 z#&ICU%ohM+K>+@VBvB)_4eezc`PES^7)4U%^dY03NsrS1VFHGUU;nXMv+jzko&r{y zD~@L6jMvaDL|g#0#ShTuJCrn0nW)xOgrx|#<#09w*eJ$R>dfp(VDw2=ynuK^CQLV` z_yIlP1<4iodBm@tqNJ1qkZ~czl-6Gt&QX1pVJ8h^<36AsyUOD`6YK-(RKk#BDxD_n zJ2fl(MP3*T+~z}CH*D#)vXD|hzWbk;`&Y&51!@X*S`t5kZb4iwm6R3a7a;3ETSnD^ z!RI^oBYt_KMN*DwuOoRO{MfXr?V{)EN_56IbEukqmAO6xak@!}Smb;g^`)Mq&Stqd zz8{a>kvwj0EbK`C5NTqrA8AE)DFB>6W4{g%NpG^JO-;08q9K@aw+8@!8%qpn?V7i8 zvj63qaDMp(g0f)wEqbP)3bQGG7&Zh>j3qUAM3Ty8Acy!2DUjRml4^w_`nv632ckan)>@xN6)2RB<*0ipB^Pl zEfC0WA}QU`J-w&lq);N-jSg=01SH4Pb>LQh*rQA_#Vfi~tN6%aR|T7*+|saaCJ;GT zLt?Dvm%m3MqQyOKyv-cPOtNa48<+SSa*QG#6T3miuPtYPbvh3h*sXrIqiiM+0!Rd+ zOBO2hKIsh*jyF6f7f%>G4fE#OSPH0hcV77KphFFXW4EUh*{p`;bKPTxT}C_+P| zz;H45YFuHfqeS^mk}jtj=0Nr!AA*_kVz_Yfup2#GHG3N1Ukh3v@x{h@fotJR^zwR^ z5&VQ~TwH0x)P*w@?Sx?u?4*sj8>bWSl{E5~3WX(T#(6_N%>AdD7LKGo*;X3~ZxgDT zsO5an$4X#>Z?daXOt%f}MOZk(Eo19Tch?sUu6wU~FR8}!?BF#fX*9!2N=8#-4UQz^ z%VHuIw?z_)Fg7Ln=$u#`(O3n6M`+&+c*Oep8_BWf)nnUuaG!)mgKuA)6TeWnQII{N-mwEvLt8lg<9(B=0ZbVHp65IL$hCuBXPU`2DujZm3xo(^ncxf4LcF^5Pi>Nk~8UOw;{FURXVIt@iTK4g(i=ll`2M75U^g(k!E^s zB~RKhS*@9b`QuWpoS?rAW{x1wrpV+=_birLgslQlZn~er-f1~L{BkQ?GIOhZk?|zZ z;(9!$O28u&G8J`fa4t|JZzWRyH*`FVK&gbIRWrm_1;P~3Gq;J*+@i=GN`>Rh87`)y zU>kRE63|JCmED2r-Jt(DO{ggD{Ai1q<;4x|RxH^CJT^@Grdfz#2X>tO zeDV|ga(xa!c@9OFZY6fJw*$sA$SN%&Q{732`l$+|UgwZWiEfuu9pYUXoK)Yat^Lwg zi)gZ`HzduuV+Eh2%!(6d29SC=F|9khTL*O^Y^5A)3`}n&Em64F#!{1h?=IQ^kpi-z zReWMjFn$7aVbKqv%siwe(u9umTaeYPq{77E)&`UXd~PUAd9hm*inZrK>VwQj`b1Ga zqfS(1PPa}aQIY-}h^xjMG+M+Dl^unt`Gqew`6W0ZqZq`nh)ehuq)1Oyz&N3&o2SF= zHuyP(^ogm|wYcxKh^f3?s-++LYvh0c{vZ!h3b^xsNOnx&B%_#5Z;N*Rs*YG@;Pc`%D=;Qh-Pt zcWXD=UipdBec$fOUk=ZLA2rzxA{{b6vn zr){2fPwsK_pbWTXOFhrv(f=Esm!7O5FUkVF-8G1_JLrjD0j^^KpqCOlieNZk5M+o` zbD+;u-L$0s2|vcYbcR(YP?lTK-Tg3y)P}%4Smt-2p9XG6o@c<)NiE=%*{5Ptpja3& z=y)4%0k>UkMV?e`=KQf#Whhp0)+CjvNslrQCsF4W<0tBAn%3Tq5x$0@_^|AmGL^_O z{DbMYv0}<7Ev2*ZW_u#gW5}ROyr>C_(wFjx&TUGz(hG%D?U;qYqb#{9oh>d9k6>xQ z1|KwfR(x=NLl~M*5C8;0?`J~djH+IBUL@4D2RgV5*lgqkiH{a*jH4sj(AMTZO|=FP z|Kwn2?=7-7Uz)olA%O{Og-Om8w@3XbNF`rEG#|p+uokO%(QvKOSt#n{{d&-K5XVr< z%hpRzGrU7MU<3n+5T5tWcGl%h;X%u=64VC8ruurocT+0~V6q3^w7Wh4q_X-P_aT9t zg|mYx)Oft}e;#|uH{U!p;_@d3c1A>?E9DfZ&GjfJ*f$tj2`291N#iwVo8&KU$m$X6 zwSw=AJ!fYpjDcFG#0V^i8D33x>w+zzioH`NEP-H>+5r#oCmagC12&{v&We>9Ya)Dw#0Kunf?6;N~O2I{`&0+)^pV||hDy;On=j-G~iE$Lz z*qCGs3z>biJt2^3sDJv!IEuziY-ZI&3v}|O#}@Wh!-r;PNW6BV*SSMO!+krJ&i6Tq z0;Z?JySx3!V_BXs)BOqkd<}(`Zhq1JJKmcIEHe`}g4=|?^1x@Ngh2FR?$vGUN_A^Mp7HU25dtz{i_rX8gQx7T zqRDMBc7-!r1U1p-_A_(wQHj&@EW&6r+W57()HHL{6a4m0yizuF#sw<*5)75GzCVca z2wcWrBF2sIWyj6x-7+`ESMw=qCSgyYT){Bybq1G->toEOq2g6P&DUs_1@`h6G|72e=HAMa9xgeUy7h7eF2`L&sqM&SbC z0NI2#n5^Zrvy4uMPc!X7R2dS`4&qhz@XmCBWez#-WlXs#c_cOl;?su$4SH6g660A0 zhNT`PT`?3D8}U-6hP{sj#i6a9A#>Y8w-trw1E_9_&k0~pyD%|6vKt1BHUhgqpwf-6 zN_?msG^VQl>pY?^)N{nJyIc#C*At4<`t*DDXGDLxpWi2dVrc#C5Rj)*spMJ4>8(9V z==l)-FH8d%=}riR?idpEb~+h#u8qPyN=^7Ff<6r*(Jzi<+zYg;a-pchcKqpQ7_P-l zs?4wf&sgp&igln^RPT}sSx&acJe(|1AXDH1y9ZTQPS>?MZ;~RucCoBag;Y+=F$riJ zHu8g5zZB2^e=+q)uvAAOo{ z&b+WE@X+{1kpAIJCq80+Om4^HNP|DLe)coz`gzFlqUp+QG$)(P+cD8&u{tdGi6~qU z6d_15{yS(C!JOY_zS{}u29R*jZ^OrG#S!T*Sj5Au>s9d*2FS@!zpIB#CRG3ALwxRd zCs2nb@y~F{6)i6GV=y#t;6mJEc|tnoYbDsXUGhP;2=?BgPSD`xBS9w0ERshvuR|Ii ziy+v7r$kgT|AKYZ5nSEiA)>O|kADZJ{`26Ncm%cFZtEnngDQ+BM#g+A-{~1bmtQ2~ zVa+~~Ix%D0>sz6^;5yTb^e9Jdb^s21zxcxB0}XN>AWBLS(~)k}#3lXe+YxF0n(K3Y zR{AcjsupE|TLhwZ#ht0rqUPY!R|1y!$w=pqW_b8E4AJ1Bs2-U;B6vQqdje|#XcBi%6c4jp4_@!>p6-k18LF|Bou>3)4bKasoca%25Rrxx4|vMk;C~VU-#Zj327;)= zC}7?St^$y_gS~}Yy7{y8ne16kRuRD0us7NxKYtrrd^c1qTp<;V*)6%L35>d3UpG9x zkCp8>U*JHhTqM=QX0UKvW)dXaUeAFF-*!j`a-yq5IDS>xjX(@;MyO&760E8$B(%kB zy7pxiS%bIKqRYnXOetjPn){o-ZxYFh1~GKh9B)t;;|(~)tOZxtILuez0wKyM|Hh=fLmiSZD;O&!tQ2h9FJHY#z!|! zp`E%@vwWI0Kf&k1exqTXlMb$~n2%&Tf^6p9@AC*Z1idoCG^G&^0TZJo+@oy?vv?6f z>Up$%X;#E5?FEwhryVmh|B!v&{K2y!y|Em#_@=NGJ8mDo4nPDW7ib~ zuoilYpSNNFYuJx z<(a(iY1**qm^{kjQqK0e11O$}@O5zrQI2YP09yAd*YCp^id8Ee*ys1tPEu z&=VkzeG81oyLZp%K}@m3Pm+tY{fD~%+Q&^j2W`lHiLgmg1(Q zi3s9EsLmU*j^42<7pUplqy~=3*gCwp9AG%?fac}Of_SJJrSoj;X?yPNy5t_i&u)lb zgvfT3v?YpFo&wH|budA0*LEfP3Z7tCI&(J|6l=RTOJ~SAcNu-*$)jGSqtTl@vk+O) z4u%o)`~HUq$htZsUWacor-Njcw~2h}u^|IeUTi5-0<)+p!{$@Dzc%mH+XL#R(wqf4yC1TWDRO#_lJD5=_wx1AwH%*t9@^~pQw1l! z)TkcxidEC_TqE6~vWwKTUVZ?_|BR;V9X%JlxnC5Wn4s~um5AVlC_e)3rv5!WgnfYi zDmgiXM}Bj8{Jm|vY_)jh(F4jA`UFVXE<2%b&MEhXpPU9IbCH<6b?j6W3z!UEVI$G2 z9{z_SPOCzU@4@w6)S2rHYJH{irf=a30t=w{tIG~>E`TuHIeyG}+K#S{>$u@tIS>O= zj3-tpGeu8=B3K|dYj9BPZ3NpLV|sqpfrPmt0vN3&&)YNv&1fYLaC;8p>1IBXJpj#f zU=+qILUIFz^Bgd67!s0f0P*xT7KYi7xB-Z#W>molELk3)0Z--_EHK3+5x`$R;xQJC z(NVZB6!(&vEDu9AW$4%R2ZhFes9c%2;J@lA5>knl!{{ht8)q-G2_~f>Mmhj8Q}?e zBJ;4@cP-%49mXE8X9la^B0z%TKDUETqnW5q&M$w2d4jq4p?boN$OUb;X|qD!G3!2p z$DcAziNlCvMR8V9U(cu{o>#*fV7$>4H>qx>Nfiw%dV_NzEqDCmSei(LCGnzY4~}%K zsJ)8Pv~XCmpJ69IhJsY+;~@WKR>2J_T}`UcT6E2P7{IB^!9D5m5(^R|i%siNfNU64 zd6hw$UTy+DN#O?wcYsYM0Z#}qAlWpx&x|tkLoxx@%HsrWXz-UXP_&g73bf{tEa8{Z z-51QA$1x=g7E@+!qd~DLp!*#bM1X4$=95F>TMDeZI;`D?hRlG66fpI+%>o<-f7Q24 zS${X;Iz5k2qm-LLO!_Xu>OM=a()F_rhf;rNTWuNADu;>mWZ$1VFR)__TH*yroDKA% zioJja(*ht33IX_z1pvMQ0Dugpl%|~Z%=A=f;tN;JOr>@di zOQBgp4gLw)`P~cY+Fz7hiQ^;Vo60Jz;k$b-m&P)bUmDP;=>xQkP z0f9?gMm!7(LTF@{o_2<(XHJ<7A2&4+!GV?qb{B?h3G=bNs*xeh_X-quxza3@qBW$e zAD^%h*qbx+KqQ{(9>jZ6h=54u24QyoQGob}OX5OKFZr$)jx~beQ|3)5Okpn)g#yT! z;FMW6>$JQk3&ROC#BG=IyIR5s!o^Qi z*aKj#V}_n8|jts;%VoMrZAS#D@2CrpC<$&YgC+xpmS~YSbd0sTCiNhTwCc?>8MC!oGJf( z%L0TFE!H)s4~q*nGI%4;X@mrpZxMbODmtu*u|9oW84n%abJ}^M1ipbI{U_rmWbB#9R_|1WNPIW@aaWV+Qc~#{lVh) zJPN4E($mI?dj(&FZ>wC#aR3FY*$%UmUb(>3y_l+i3v8Rz9!MPB0$e^0Oioecsvt9zvU+e)s)epiuY=Kt+jx1DK^^(vOzNj9U>hVw6RAwo4=3C7%?7ixzPR@ za2Xm{{=NeNLn*NBaClR&`9H?T4xq*13S~`YZ;S_uqIpVhgupHqwaiG3ck(xwXWg4O z=PN9i7i$jfB6Tk+w^RvW)o9fF0{gqOiOk2owt-k(h4O)LP5`uNP?1B7xHFE+hzkGL zZV{!yDX827P5!)6a$4rN0E4b{KX4rOa-{gC9`fxh30yI6-Bw@{%_Vj>s~s+P@rK{p z7SvVuC`B5QuWm1QlIaBaX%7EY-l$>F+9jleJ0fe(kPR9c(U`$bHXa*T_C=}^XB2$@ zfKYNU?1KVmc?`Aj+kn;>g89$&Qh1OKy3O0FC4NS1=5-czbn~l${%WJQ;j*n-fXQ4! z2MA0u*4{L?Ze9s9#E3IPja#BTEf87n9W~!4UK>c>p`zBgHl!L^UFilP$Scyho$}q{ zpwqf-NmF#+*7SXa`W~EGN~am_;@?~yyx^mYdhkL~w=ykRkzZGSwrT>hv6DiKixE1G zWZ8Wq!%mjK;0lFH%~G5jrJ&9A<6!K<&n<6wHt@#v5y}dUig(jW)axu5&eg*QhLM_N z#sWZ(ih%_E(jGO&Ay+SK4iET^svv>`(SwBm7o1De*ESa$z+#bXf6tO%YzccbW-Vaqb3Z7hdDRK(0&pfK(V_^n9KL<5RVD9{raa)<|5+&Dg zw}ba_)El^ey@=uuR3s0m8Qs7MdA>X<0gyRL8U0`x!_AL^5$S@s7|vhoCLXZPxE#Sy zjfqIYI;MKmJcS*@upRd!XB79=7P@Uc?n#v$?0AWB#f9k3*ZS}Ke9DL*4!y_uk31`+ zR)kiJS4FN5$`w31M|&fGq*VTi0Cl3RRIA_m!uP+yGVJqLdQ)Tg1{_%B~k?TM!?? z{*TXK*%tfJKyc(C4D^XG92qkDAn*3^4)~)U+l@Th?S0@w{cHz$Ys1l}C!8VA1keyg ztBJ8k3~e&B-;*Jp#@RB?#xp}t8ro!Nzb8XHjk9H(9V=t{rYlb^TKFZ^g#u`(5k z3^gl!jS5z#LXn|ng|AV;%2X&a)U5C|Dp;8cMTVLczD5NrQ=!PP$@c7}md2lTV+Y-o z0nT>#vu^A_w^s z`@nf}(7MP8BBY$+i{ScJ#(NhTePXx2dD@VJEqK_U7O1UF4&r+r(wl7a3Ai!P74;Zb zK&H0*Xt)COn|0vcGr#8Z#khwvzvAb|1gH9B$=sNOw*72D9tZtx()5WT_m@n(w+g9S z11Z~$Gi)5@46MB~1{eE1+5l2teG!naTr=_g6X@;xi7Bzw+1^=jrorcIdH{F0P^^L6 z{J{frw_2UgnBS8Pzg^M{bORS>6q=SXV|Ew}hXc2{j0M8XKTd5ht=v!#rXj}*+1Ted zy$v9=C&dO7W;zf-5bBiiXsJ2|EWV4WJZZ$u9oBZ!t196(hSTn1rN)HkI&jr`y}W#@_2u&NejCes z9bFQ+jhnJ!pA#zXzcq3F_!lYP6Af&W%1# zDYBzIpp)fnB6XVPUR%943P`3BLk- zJK=^?`@jG0+J=T%MJvFq)>O!MTm-|SfVK~TnS-b>)x$V zES2ccp~IC>_t0CZ(;`zQ)HzqH!iuOuLfuA%p%5>`b)D|z_^z$WV%Ha|Qr#1I`W#Ws zf-1q%46iuP{cW|v)*-MMQHQRC9huUx7VL0b4A-r67n-A2FLp?)g|kvum$g+>Tv2^V z&vTt^`q01Zy7%4npcBrs@}0`_?S_d|BaQMZMr9b2p6=^!eSo}>1)z#~?QeV}*N1s!%u~I9A3146-4kb&H z#|w4$p*PJ#Z<`hyO(IjJd7}56bfJmvkW`Z1X`;Syn+hxPLZLaX*y+>MQSi{5tpjzS zZbLUb^b~qSHyaPlLr<~Mo4;-fs<0yHo1R#%+PaWp6P?|GK%#D+D0+Q{=oTocJauln-tRef(z=R{5K^xiREXNCO} zLZO_x=t6HqSPXp|nx@b+=@AWe9H`S?N248&$Fou5HL8>veMKZSOQTXL?(0>mww|uj zETk_q$1nW$sV&N;E!a05lX^BD8y$t7LJxVpSleBxD%-?HZyQ$}@_M1PFRs!}pIN$z z2^-sbvl~Wz=`&X+>z3U@_DKI?0HnEJvZkyiPCjdK|xVd$VcZH$B&tXnmGK z(gvlV#P-GI1}WLv7nj?M#(l~!91Z(LGG0xUHKa z7kRXaG|4giwrLSxbMeta#+GwmO4Vsqai7pP%AE~*+KC-R-B$TnJJTadO)d`ZMR;udNqFz*!M1G@tw2Mm& z2XbL)t8&m7njp=lt!bkdd0n?j@kIJK7nj%G7g>P{Umq_fg;`tJ_B#ElEEtk@33ZLa z>20&oEwPhbS1*@HldK5M@btYaE0!lnqvq&C!PF3TZLxb_)Pbk-1TuvnS#D3iDz{mw zD!aD0ULZ_;PnC_esI7qj*=Q6+Wuv5T97p=V^8-5_n)b*uzcFPB%_6FhkPb~G(FA!X zB~hnD6@fk;&-1)c=L715M4*!p_X&wS&l7Q@~Z-yjrgslA?!0&CzEPPmPx1=qDd-hM{JuTu`D) zHC^>3MX4-R3Tjwv)xO(m<-9SfO{=c2dal^72 zH{ApEok-C`$6jZ;`An5hG*R!gc&?O6 z@6cz7vcYqmK0|RVkJ6l0Z7Ff|JNiY`A+RFq5ZGSbhqOr?c`w%cV!if0D&;g$ zDW~baU@m*_y|1=f(pD=*-UHHm>!W<%w!Pl{PH!C-C?HCrqAMZVQO!=PvG)Br6qPc+ z^XE8@LqnF0mSm|^g31zg3B_=#&1Qr8I0dLJo3>P>5-LU7rF-Z~xJTl6N|7EtT5)c4 zqhuJGD|YSl6)31uVdtybak?VQO;KK6zEX*rXvgz0p>`=GsdiW5v=m@Mga{EI zKB918@5|DmM>9pTLZUali75G@rEp}|mlm5->{KeuT}u#^LQis%gyu(bBGD}7cM?rh z3K5lc*HLp?(wF6?tRN+t!*WqctSi=ffg4>3Pa>&gqe<}$$q=(I=}F= z!SSGuX1Nz$jAM%Af(s3b=!c%?x4l(t%w=!=m&>Km@bpS@lR{mJI|RiPAYHub>Cxd> zqI6mlI*uuG1kDlLq|&{P-n5Dnv%07)Y~#JI2BKS?d!g;*QnE|S+bqi%2SdeloV4VNQx#2WuwHGcDw4P zLx<+HE4F2`+1eG`!f9XFr?&T`z!lGgS`mffnW|!+M&C#}T|$jgBS?xX7g)q)n_oD> zs4eW^%e#3cYL=&Io5GWacJ-#C&{=f4qy!5+h5pcwR2q$?xZio6@JyvRJ${`&(~bMm zrrJdV(WLWx37kw#Bc9C=9Js5FM>$Z=eUej@7I z<9U)H7^2Q~U;DzgZA&%BMRQ6hDVnHlLTxxM8xSYWlzil6BN zrJzJm3QA0wr*K52P?qXVPah$qIF3?hj7oMS%s0L19(vn0&Dl;jMd7&AS0tjcdu_YW z;W+w8UubSRPYLyQm4Z0V!*QKQL!lPgz*4p4u6uM!k^5vhP2!m%Q7CwV=ZBu5De}5c zhzhAh8q4tfMki;MA!&!Ea1>KS6IJR~L^8E{xw>v;B_?&f?aS(Jm57qxha)jbI3%13 zsnovIwzT)v^Icnz6^XoBt@Q4YHgyqswH{7?wBmt6o<2z%eGym|92Ogw*L8@%6se=o z9uN>9dZSBplR}cG)Axw972!FG;p?WY)~CX}PObX3xO=q7qtAs-K3%6mDt#mIL2{Hv zq2LJ>ah*<~oZ(l#6S=ZlJ*3b~oh)ZqqB+h(QL%~_`M%$3eZ4yrVu9}qg;j^V+RTQEcp`jU~cF806J3(1GW38#Ax$c^>3hD~&4L9f*;tz~Byrm7Y}{tGuB_OPrFy|a*Q@;&<2ITU zRo?0CMW!%&6M40oQ;3r-IonhKAsLyB8vP}b_sRvA(9jlO$x6N$&%b4ssvG`7y3sQScIpKEb_u! z>fYLmEj)eXN;smPI*FsXx85Z5rBbOoL7^w~YcM>?x^&f7QX!QzO4O-~LqR{33AOT_ zzV*;#ICdqUlM@dVqAAl!G_4TH6xy9|9Z#Q9ryfc16wlE}>$KHM(Ra8LQ4Cj#B_}pI zb-ko5s?#0#jYNhdQd9}eP^U{`k#}vaZ|tAWNoPWpB(x-jM~zUUkEdBsv?Rl>`$AQz zn|*RFwWWeUCg~eT@%uv{k2IoyCh^py5gb#gkEiqbd{mhA^MP4i+Ly}~8(pHe;1uD+ zY)U1Ov`;Az{9;Z~i6&W2meb>L9S=~q3D1Jde~j`@i`CQ<~V_v^N+OOO)1V@I2|VlV9KZg&+ziYChW>3lw* zPN(U9-HWZ4>WYy#l2lM3=^naE-#j6Dekb%SQF~O`Hbh=6>)oLf&I8h+lSD)Eolbw# zQD~DXw5f?u$m=xQdF;Aceb*>L#u2K8kf|+(5}v8O*}Ra6Dve?~w@tr!JRq)H`PS)I z9in=w5*XVucM4I%CLN9jR z8`Pm3b%tliao;@MMHgFFQVPXzDAEdww4%1@NE}s`Ra-+;FL&>seDtd2qPJDcWfSwN z_Ws6$rP?eNhr?3EV0){sMICJ#Kb?-#N2ZwP9+^TslJ8yTwy3uFq2h<8KAw|M>ExqJ z@+-+OZC{N7L6XSx{t0<29#H6`Ls3Xsg1ZcrB`W%G9*?KrzNf3d*laXMebXJAAKPX# zN`3Edo({)-gpb)~G17*NNtYD=dlE-o%EAc;qPi?o1BB3hEW5ZPEE8%QBHkz|>I*7j3Fqam38{(!e6()5#Fs#~bReNow+ zgibmdERmn-3rS>A(-aPasb+1v*&PGd;1yzFMC<+06G)ded>LXciigI1{R#90ho96J)`ypLQ zjY#CV%Tv|qk_^wrI(?OWwN#!S#}w(&YRk2<(Z_flmFk^$j`M&#eL}xImghm&FYAg` zVXoRrRepj%-FDFe^^xbfkQbgrlm%6S`<{;1>AktlMxR_K8b6i4@0dapq){?eg5bBs z&0?0Vm&umyHw{FJ&5$;AZCtfoV+v4B^ees7DJP2=qM&6Mu7o34%uPp|qqZtFCiHY^ zM3&;d=tPHSiWJ|4LZ0`HM5Hg0K9Th49Xm7-ATBG8sJ%CDj7r`e!p2ytEhapD_p2u}qMw{p9KsqGN5Csj-eYALjB^ib|k4Zeq{jK{zeV*wGRifus z+2{v;zrZw%yZ1pKPm|$TN{x1Bqgg(Z$m>L2=VC(LU0W$SbeJEyMML)XjG)J2qVGDQ z4mzTAMvwZcUr__^A!^{LGvtANU`PI zNIK{zjzpBwM1eTXGL~M-?sUj2Q@)X)zEg;zLMpwiuKR8DsE=ERw6Llv7UNJU6r~jR zSI$$3-dHM?x3AaTdplX3cs#Ca-&YGEQ=o2b9d$~P+P%k=%V@=kilpdux)Sj`@k;3T zPG=I&kR+<0Bv{&4orEPJ>5%(Px8@9KUnN%D*p}LET$j>x5Z9ez_|E^Pk((r8IEar&RD%W;S1Y^I8-vO!ZQk8agh^>|vEw6T{<`*OJs z_jT7674A!{rNopPi98f)K5gHa(ABC3b)|14qs~Teu-i^wxEtP$}G3vWttG%|c5+s$-b0E*@lsdPF zjww<+ZBjHx+k_fP<}RJ{lD>L-Srw62%Ng=&iL{&{ub1|PQ8_Q^yGh-Z`VZ%6^ivU0 zHYuX0Njy1|z4goL?$mw9qYqn$oTyU?mc)H>^c#DvUR{nUCpfz3M&E^Wc+J^+n;X4x zeS2N+t}pL5kH_5R)W0KJj>PjUQTIA7dvEjy7y7l@RJX<1-i@Au<%)0I9TwCS zNvF}Zi4@;;bO`!J0$WuKWZf3$r0}KvJ|EBXJP+tBjjD=O)z!;2ekqce zB9cf^Z=5)GQfwWSay&gCl&1RLT_^Ka`LW8FqS~Co12(5!g~&(w1t1I-idhYhR87k>dCbMr~m~VAK}2+N#^C zFltK@;>J#eRN_MVbSlB{8)z-nR!e$*?Ao$Qf6{4BYQHw?vS0N@Z<}+Yq3NBv#rkro z>T0)?DFA*qyT^K?8Mjgp^=CsiuLU5eY~Rg$RBMEB{ruPesBmBO^I zwpH8LJxEkaDN8A(s9P#^<#iy?d44FB(j<+gi)yZ{i2Lk1UUyH2KC`5jq{`k$#g(wt zqQiaN7i)80Eqia&Eml{HmX`~JSzFYrc2c!H#f~THh&u1I=k9mEtnKI{i6POXkVKvx zPYNlzjW$KnDTW7ybV(#rteYCe(pd_zbf@#%VhP+{$^2AE8}b7;ihkUuzilI_Qkp1e z6grC)iybB_r^P{?szAf7GeeaUHICoO>wr3K%2J6$-*l){Iy7g4xh)h>Y&Z`0gl?@? zss1rU^=j$ZXm{ljq-1+lck|Q9V?wQXOsEwdDmLb#gG43KpHTQN97o?crbv+7s4ppO z1Vd8?30R_8t_kbI1u~8wdXgfECV{zLDwXQCtgRKR!c;BSWmVX#g!QgOUSciks-&cZ zM3d;wONr*n5+Ed6hdw!)^Qz~N*ULFqI+`S%VyP73VmN(Dkzu!aqRv~XYO1Rjsd}|2 zl^PR%#%_F@sx8$|nn<05GzfkqbC*v6b;=UGXsfW7%Jo8zUkc0UJU^63b2yr#Ih>6P zP&B1TrH)pIHdDKa4hc(w7Ey(KHe2O0&w+G^rbzMh!mh2B)!lE5%6`PBgrF3Zx9xCk zP*RDO#HUE452i!X7SC73#$YTGjdFe{C{k*p=SH39-X4&Mq{_uraa=g5PNS?^ zuNeEHSyh#4vHDQ+TM9BmR9TW|791A3eF(Owo^{Pwi?oud*gLxrPNrpc~A*1cS??WbeyxTM?d8>`fBO4#?A<5Ku%n zI^3lFjRfRQ#&ASg{X@r)v`g_j@oYlQ6e2IKlIz_fPp1=gomUc)%99sfm~1;pT&Ydx zY4nn!ArI&2Kpic_T&=AXMw7&pFx2UE##E~nWB2=FVdY-e?*}Q_w)Taos}~9Cb_q?j zJYAwUuA`-pLbtlj<{NjOAgD~*?4H}LvP;h zI(M_)6vcU}+PC`L$V~pd$h`Df;wa=lHv7sqPLB=%TuFl zZcKXm)}b^@C}fwur>p+Pb;!@>L*&(>p{adg?c0rC%2=-DUy`1bEpC)V@`yTNNTZ>N zlp5{wB%x=A>on_fc5P9qHVR6nELbZ|S+GxM49&ZQVmKXz-d;C#(NXAaqdA*bebH0s zjl*%p*!N92u-N;mFQJYefm=PHoFpXpji}Hp$(WS4VX6hHHG;-ylpi#n~stg1?F_j&$2EQqt`mW-Sw7PgSHiKhgVUVN z3lOb;>>sbT4fPKonumT_-BumazKC?1(^F_5QfOCtq9iC}9PND)6%u_y#SuwFJrfkk zdt)gaOCec`Tg-<7ZZR8;ZqU>j$MPLdH=b5nTwKUQL(au`eLTQJvtp$ zzUq07#!6;0PS~$G&l!TXwO6@QFdYo?OF~SLgXik&@wEbfCl5Bm7>8nFhaW~LUGOg!arv5jHY5;|ML2qsjcOCo~2a@&yg9K@prL^W$M<0J#8K+|XbeFrkKq0840@XD1Lth|I*w z1kDmkXwUX=0UR06fC%lcK}?8@Wq^eC!y#ov#xW3pmib-mpvnB|6*QT@3<2247zRdY z$7sNUjf`KQBESp{b^#IEaUQ}(#x1y^Wqt&7K$H2iIKT%qNo=&A2LjlUl{kPGn#_*{ z4m6krIke1=1rH5gffCxW8ZaCg#}$0gAVGw73+Dsz=n35 z2bclmBZ$zBLl3Z#u?dRM#OR^P{HUhM{AU9+xCBIK$3PH+jZDOeq8Wk+?HUz1Y~&({ z(Ec!xY-A&d(4KW5Wh37RKn^sSAN6W5r)7SW_=E8)z~= z6JTNP39NDL4zZ(LwhEKJ?O|wXuk*y8UZxHa!cflC<+=3fgIYi3>t1^ z`~V!<-<4rR9>5aXwNVO3#ttAt%lsh(XmA6D(2hSQgvb~S0K7r_Js{9zeost;8K9j& z`#V7B0HMkJo*9}25J1cP9v7O-g!X$wG+&5XCUYF)#Et9)r~O$4cI3=899$fBTIM(tVgNbwbI8zSo~2dX z$Us0r019Yen9w*|Xo3hri-B6^=a^`!m_p0^Jy4*@L9|Q;hxYf-facYpp#41-pvn9_ zc8~HsrqH-Rv`nVe{6V``A~I3~ADSezzlQ-d3djfn3}cF%IetS06p%B2k03NKmN{r% zqn3Fl#vN{C)?@~nBedfx?7@ia;%Lu;kle^E9NGZlKpnrakkySm1d~Dgd0aq~`FWT$ znd1?pTGo(gnP+<-WmyX-v`psb5z?#~?U{=nvdn5i%ltf+(7Y5tI~IcwNMsj`gD7Y+ z&v;xQiL5@M;XtzjK^U6MzY)=>oY4La3r$NB+A|SkL{708+TR2!a13(hIHbgloB+Iu znv64`{TrvrJZpm#vb=Evw9GRic93NRU@)}IaYzSQ&H^()%REZ~jw~PG3+>;0L4$=> z0WFic_DRBxd<7EPu?dK9BeP^0(Eg1DG?{-xg=S^bGXG{oqq5O{5+|O>nV;mP$y)nK zSTvb`#~w6a4(%A_QzAzpg|;J>gm(Pni6e3f5$)e$(xfyRX#b9kCi6of1r3TJp#4y^ zn#{8qOu&eInrN9HiXa-ZM~*C8!9&aZMUu!eYSS{uJeWe3%Z--FJktS2mdUIg-@yu5 zK9OjdUkDj^3?rEz3KPwUF<_z*f-j)`PN1P-VhQcoi9RHeM|NnL--$CcRXm{`m*ERe z|x}kxZ1hk(3BF(4??dJdy3^4L7q5TBdXsGz1 zWqtxAp_xLYWqu|Opa~L+g9ZplJ7xk7JR_MOiI*nxtOplBWGQlJ*Rmjz$S0llr(5{Q z05H*xtx&?rHiFPHKN2R*k87DKXr5iy%j&vknW*hc74yPguj)!!7x#s|_Ud(4Hg=ZD z7VePOeOY5eRO!pAFsfS;d()Onb-kqT%B50QXPLZfi)(e)7Yr72-}dFYYHM|A*VS1j zE0?oOSSBv(ERz-Ho@H{DiL17-ly%QCc~jL{CN6iCwy0DuB{5coy<)so3A?)X5LNoR zyK~>wwYJU))%vC{7j|9GGI71Gx>DWrEEAXdzND~K>Vn1g%j>$FWzu?C)q9r7 zYIUjas?y?J5rMT*7?#Ovzt$nD*Q?d4F!x-B0`y;L`CDLKoe zO@XPK0g zs9M(6SthUb)ke?@>3#+!U5>?CETD>vqetETsw9YbFVX2CZxtwL9vP@L3_QmBA zYdOn=Wdd`hFUy2w0usfPuvBfzGHKV9%QA6YY0ENUnY=1k>T0R&+oo#^JIiE<>eV{S zq~$CVmdWd`EtZKZ#=fqLx_WuFV$L#gTWk8Ns|}02tg}qo7v@sm^(+$?m)BV)ER(ZL zT&n9VlUHS#urIsDN?vwdxu~nv>z-xeo@D}isa_SVRjsYcsxKujH+8vbjD0EMnwa=7 zF}VsW@}_UPQnjqDmh^33u=b_0D@WXe5h0}K3dk6kJgR(xZ7Ia zn2%L!jkR!|uG2wJiYM1Qt+xjS4F8TtpJ3_p={_D(gE}iv5=51#l}@jO+XE759{S^b zK%Gx#-E4aGa+x|>>y1>lZptcc^V=(%s!o`9pAK}Q;|OYWizgVK^+erGK8^z?WjN?fUKN%E3xl=!YK5snnU5w#MERNyxcDVoaB z#bt9Z#Bo;a>Mt)T3W`~xU#ClQm*?BQP7b6!Pvmu?DKtrB=mF`@)2WnX3YgMcqpfpV z#Bmx06}eJTr$bOgOY$7|>BN8}E+iTSu9&#aqsLN6mLyq{2bECaI}in;8x2WuH@-{}N`DW{Te^u}I)r%O^4(G_&1#3YeEbL+0t z>E7vR^IR$Iy@{C{O`WAqo5WLlozoW2@^nv^q}Npn3B$950&$;kMCq~Fo9>~T&>YRt zJ9car7nhFf^uNM?XwHk1Zc=siU?og_PZyUMj?>%L>AWk8N}*u4Lwg#C=D=d_S;S>! zV`s@%E&YY1V(E0aurHN~%c6j&sB)ex)e9DSUgy!BK1-p=LVl;RM8kg{iKM;qxoRtY zg*2)}34a38D0wo^@Krk@Poq$8EKw-b9!r(#qT#u|E$kgBo>XkirE6a{)FFz+(}|=e zKl9fdPIGj$S~1oKjJoonBPAq#(>pdTm)GVbg(11G17?jWKs8AbQ&xm1VHTZ^=XF_d zEqk}L?7c(tTdgnH##BEk$rDAD-WmGP+X@}SO=4A-YwMJ3G`pTi@h2KpqM$1wd5Wh| zjJjf1&T4&ysgPFer~Ii7T=6DWeD2y>+@p%BytbuP6P zj$8+$wy^R8{!nA>%QB81DXOmZ9nxxnyk5Zq!4cDKt%{uhS-lR&BLj?ThOQQnGCuNl2eeu|$?yY*+Po&=Bmjcs`_6LLjXYl6%FbEo|Dd zR~IxtUZ**p=5#d2qd6bV0clQ1b3~eRqB&g0={k2h?#w9Y@r0PCCuu zJWl6vJde}yI3ADl@i-ul6Y@ABk8|Q_4%gvy9gf%GbUGYQhx6%hKpjq~!x42jCmqe< zJeI!vTFbp$|v&;hcOl zhXZjs5XS>?Iv~y2n~m#e6_8Zg==YaZ#k9oSjiTYnd3w_>E-%ijo+UR`RIe8GkKb#1 z_oe+<@2axVFjPUqktwFs?S&*#{Gh(jKz5iTh?3sIVm5lO)1XN2l@;Pdolz(h>Nrq% zj%RpevIXUUL4X2~fQ|N8uQs32GIjsuw;;dGm?lh_kuicdO4akN%rAVP!f+{5&BCoNd zRaFYxP6CqXR%NMd)$1M4m@1K-s9SA-bfL4@R+~*Lg?ZE6ZEdQyz|O~`Pbtz(S?9LG zTy`rZqH><(J+Ic*K29Ymj7d)+hn^+M3;Swq{>EOYZ?;#$U9 z%2KuIUfouGNf&Wkw_)jtEX}fnil53Bq@;b{bmOWd4;5j^*Gyy3zL1|?jVO?4YUE=m z#?Z7d@@xw%niKLd4iIQL$j^=dqsbs22LYmiApc2-gT{gUbHE0g82MV}09plrFd{@g zwxS0D$j2r?GedxZ21fq*poXjubC5$E))+YkLK-qS^B zWLOya$54Rm3i)`49I`3oXIx-ImPUSt#TpqC^0O;KSrT%Vf(!*28Tq%unw?Mn1PX>Z7SA7J_biDhuj7>jQkTQaTxM(7&^dU7eJ`uom+123j}^BR?7+#zOwT7T_z4{8@Mcwn9EmA_lk$ zBhPpM0aGFWR#@>AMxH$(2`q(toPjamD2yEAK?xWN`S^_iPGjU*5?=g-{LdkSoiK9j zgb8j!{!u7kCX5{aU;)?%`4|Ulz&pr)84kcM$k(6%0+%2^E5H_bjFD$aPyvHr;f_13XJ?DPynXD$TKN$fG05We^CUMz{oRh<1j}41FYZ( zj9hD?4;TU?&(Z(@{D6^X+XUDFBhOME;0BBwbKnJoF>*{Yf*CM!OajFMMvjHpz8LxS zaKi|cFGs#c1s{^gfdh$rJjO@}yaF(2V&oqQ7RaiQpDln5GU!2OgZxYgR>&xjuSMZv zGvsG61>h~@XD~ouD&*rF7{Ecu$23sEJIKc|P{B3GzYzFBo6r+ScH)` zfSwp@Cqz@T1 z{sDkQ<^uVcj22`lkgst>WWvaQ9fD*YkdIZA$bgZ5 z91PI#kpCoHL6bv%=7AMxaLC7UfN5;Ve-?ev(2$Q`Q)p$#e-$iR81gY2*3i6=k898b zO^y7Q7SOPepOIT=Rmjhn*qRjb@e(9j6Y}4gK|>=Sr*Q`j3Hk3s8JZCCFGQ*RAV0f- z1MLR+nhqOU4f632M9^N4{~+F=u^>Owp@vpQ{xwk2$jH}7BMk)km=8*59mv-}pQeHQ zQ^5)i1Nqnle9$0}e*kEi9kMy(|4T!*Mt+tC6f!sDXJrT=6GQ%Sj503dXG2gxriJ|1 zfy}6ouR#%nj0yP~>57a9`59>=J0m|^%gAz&e;cuk2KmPV7+DPRKSm9*6y#@Fm7O5} zDn-ahkpDBdAPYhM0YHW91Nqq%VHQR{MnWokKt3KL1{oOn8i<1NkdHa=!F9;TM4SQ3 zAwSE*hvATaAV_c<@-sEOU^V1_B!bV7e%3o9WXlOaArepW^fOoaTqKmhoR{0xua9OPqFV>9xzG62Cc z$p3l(?1FrZh8M63@^K07fXT@J8FauT$k%wJc#QlbVG7)Vd@Oc21Nm5sm6-ewpa*Y0 z4w13S$4xjQ&BreQ5RiX0TCm8+C;(W=$D0l_9}BrcnXd^z8}qRiM~w3E5Y7^g;N;^r>~JFg5`clu$58lko(+UyCZ9pFQCM&;J;5P=XE5 z&kWE$A5(z@6ao;QpM9i7jpyGBBhUYj((^SNQgFc<&wmSe4DtMTM9;@d5Wxz{sK5oz z=Vvg;q~~J?h|kACeBk6qjvLQE8grmP44$8LVFnJ4=VKQX@qBFwAzgSAkbxmQ|3ozL zd<}>ho}ZaP2+zL~RGyD{5Cbu|DbLrEP{H%>2H5knFr1#BH6Z5sS&$Y$K|>~?eEz{u z2R}#wKR@e?Py{(VALk|32m%gZ>GLzxd;Zs;0u|54a2WFZtaE_pza#?B$3IxPHh`!D z8Vq>;lNg_$jd&bD6Nl%&0v{4NxWbL+Yes<2&-hS2AG3h)dvU@yi&`&qx4)G}Q3? z7ZHTk6q3-9!t-$--0*x1rq9PiRGy!$fkXu=czzZH9GRw z`R5`E&;JlB&(GA*hdw;u`7d>zpE+OvUFZ2Y3SW4BrZOaoE<8Uw!4sbUC3d)&@cchP z3MzP>k4>P)^YIPz^EJ{Ao{v93fGU_F_JJL2@%#rtgy-W70C_$>!WPfZP9_^fpawBK z|4QiL`PmAvkYb_>&p&Je&%X%*&p!!5&&T5mp05QlgXd>pK*95~Hk>^Fcvw9@i`l{R zvm>CM|E+rdSq$*}Bhi589}FctA49>#^KlnSo}axz1JBP$XgnXM`1#l*jpsj$7Cb-0 zqVxP)VGqxL8)EQ$9CL@~XF$mC{Ok{1JU>%I3eSIj=6J<}5&6#|0!8G^-+?tWVbEj^ zP@)N%42&c?z=2kiF`x?V=Rk`Fgk#fWGWZW#7~0PP9Gc8A9&T_WV?Shg&@#tp47rhU zAF?WFnV$n#gZYp#LCgFbFoR|TQPAK$?9i|tf-r)1TmUtIcmr$>M`WCbCA4Qv*x-na z@t_Gq%ls55fd=0pV+ZZ00829l?We$}$@~+jg9Zr=+W|vC%lr~pX>c8I6SO}JJ!mi; zlF;xRvL%#3gXNH+5z^o|WI)iuAkpA8vL0k-WIM>2Ka2oqFdVWOXqi6(DA3?HU^8fc z1jNu}{s_1<*bShTIX=S!vW(jhWDhjZGG+t5g7!b43L3nID74J~00U?;{~CU18LMFk z?SFs`jm9C#GET#ZmigU)MwT%ew3ay*029ib`5h3`;4_Tc-+(-5uo()VJtIK?N5*A% z(SAPcLCu&9NN7J4LNG)Q2@wQM1_~Tnrilg&NrNWygP{eD5I3~r6f7d+F&qH^0SL*N ze>HQAWUv^J(0(wMpuu4#6p&kDrgha+%(4l4i9;|#a-a->v<~ZYq0_4my7XgOISdA|@ znoI_3LF)jngZ6vz1Wo4mLIoO}1t8ip4-~MwwC#0@Sy2Y3WFwdEP@&UF+$`7n#@l` z3=Kwu3hgJd01Z|Gc7m4qi3Fj+Nx*1mnP(x;@C$P0hY`aRG#Ck1Xg`dr$^0-&Xz&qs zXn*Vv8f*lN0_~5HXtFL!phU(+z(>&jcLtib1R6{P>;w%9L8Bcr;RGWY4?%}^Jb@V$ zAY&o;(Ej%V8jT3;83zM4G7drx?HLp*92o;4iI;sf_y10nz*0$`W`94pN}JD~hyWh6H8_Q=odP+}cQyS^ z0ATnlqX#rj*6M~xUy|*H8_@sJpF*)lgCkIMZ7s#joho=DkOS@_o>#PIhDl>)tTf6D zn=~5D=t;ve!f4?>R9mY8`KhHQ%Le`N{nhKFH|aGYFnF*&3eW2I&h(kFi@>Or$pCh> zR%+5kQcI%EYVhwM`EC7Y4SaYm5gB95kXeR4!Bv99RWd;5g5(23~C9Eq*N3mTU;{-tMLTjz~o3A_ZxGpnH=fcT%m-4B&st@NPM z*Q$|FX{{P6r`AfL>a3Llc@$;{&f|E;MAf4lh`zuPA7pt?%c zKnFzAp)r!=-_|~gyxmtO^{R*3*&BQ&2mYZ`F$78KT!mzvtRYqqRbBtWhyI9Eulh;x z$D(*cBU%4@`CXD7z27hmjtTHT@V^mOdiKlhm;~-qxZt_L3SnD+_lZx_`y1001^{TS)-3;ciIlgw`tX+AC%1=S!gK#>26n}JSs!+A~n zbi{uyChs#TR`3HL&j6*k->kQZHx~x0Ur+W2G3-j&69&j=47xpk<}PcJcju^yt%T?d zNI)OI_Zz>04to>{;HytvkpMUt!OeMJQk%Ms(qMO1doTOnWUs9k`UXLJh6anR-&>!4 ze#k!1Yt=ZN?tQe}T}Hmi**TmY=r{@}_=x{1$w>=yjGO}~6eU=Z%L{XYBUVyHP80EQ zTA8=}Jo5}!KjPg5Y7jm$@(M^);FyOe?X%RHsUThvR5hMpAy|na=rH!|8ap!U9>@ti zZ)HsZ5q~w%t!gG9qhkF`4vCL_p^dC>y_(`NkY%PXV++No*8_R;=ZXJO$*r96>5t!b z{O~Un63$aw7S$SjaTKqpg<-**Ii*PFW8MMcKHwfNRp$qQ#JO`3<3^X~*|mt&tB{FU z8|dL!2tdmKpT)DsV*Kl0K}}3!*F#uT_1%whzsHSBHGq+LL%9c7eP&0FKOCW^L#Bqw zj=)u&tZpYevh{c7^VD9A9OafDH4Pz$^PiI%bt5w-uZX_n0nt&k?TzdhR#E>C2}eZ% zE-#y)l9C$siU9Yiy;D)KBRvA|oAg6(PP|Hf-hlSQ5n;flQ06Nw-r+g#8bXdwWo_fS zdt#mAP;bxR7d$LmFUjh+&y{0EjQn;Xl+3=-cBEb=xytYe@a_AQ{fRNqlL=~YR)L=}yG%37dGqn(NR7Mn%S;0M z^1A-0=bfhEE={1cNW!0PGz_#5VhR%I0gT!97HGsNf$Udoo_s>B;v~gc6A=f+V9*hJ zM#H*|iQe4L$I|Oc(I(St(Q(Is+rMZ_i-Ye4P1}f`jNZxqSsH8fc}4sjE{e#Y&ABXW zZTl2Q%lS5OJ8};SL@XyS;j|m{lMLj*>`8|Zy4V^x~ ztMj^tTh+v>n;SJ#k>`3OAJ2Eats>wq9(=Eph8^XJ# z@1QYXFU}+yVsD=;KnTE62?z~4J$zx96twH1BNr+oquvGH(>}@R(KMiyE*~ZV)vVcm z7}|7AOT3mLcPXBcO#hod@$Kg#7<@SPJzx*gi|(u9{LDGjeFHF!lhMj=LcX_tx-yuR zTV1_j7LaeNt58)T0}T@>@(VNhs6X!IB=C(9b8qRPkpDZD2Af1%`bu+>F!nh_Qc{#NEjOYg2C^wynv(-zM|5P}DW1!t3lg7*f3N1q zFtUMK43jyhQ-CPl4QppsPqIn&pp5pS4n78{s;n`6S5!;9qAEAOtO~~hh@#AKxZ^>w z^tx|MOZBd-i%@P3KY1W*fR2sk3FyRyq@*#RFlo+xkd=e-b9P}-*>CHp)F(dA9EwXz zK#WLeEu0Pno+kA(()9pVEBsq7rj=FJXNU%76yLK{E<(hGNo4}_1TpUq_pdq(F$}9(ynKONGZp$;5R3XFxJ}ty zhZb{8YJ>#<&cOS#>D;Cx`66sv4M0r+04j;>V^w!LIXmiGOJ>ytaMgXp#eaa+x#Tvo zZC)R^Rt63ISZyZ9!2mqkr(NitT>N~+e*QCxW*h`x3t%)$Tpr=lx96MdOBmxg(a}h9 zTcfZ(p)A(<9t;=bl8)M8K2{DFDd2J!T}EY?*VrY-kr?Qn`QK%27JTW5sdOZFvTv}M zt1Bq&qe@O%tcO4l&ciE@s+MHwkU&ALemrwSR8KG?%Yu^*;T{?Hpe7lH1zTh>po%Nl z+tezF-xC0;yxKiq56rV_8woOYV24FnsKio7v|OgDf;)bU@KfP9hpsyLGNDs4OBX}I z9bUE?ic}p!FFZ`F;11GeH2m!#1Ki@&Vx-i*X}V&W9rzKLkb!I=x{|2rD*!TVf#w@> zwSd~34@Kc?VR@s-!MYZ0B}_04Yg}nSE;)zOL@-SL1u={#*sR1aVM3b72LZAbodhB< z4PWd$@Jm;{qD?B3)@91EQCkj`^&E)uaw}Zkzjh<#sYHRlpPl(ACiHXErn&gI>_h%s z`{^#G?z&cWcOfVAC*J^0gyWmyA3!w>#&zwgT{cxrA|Z6sw2jHql^%RBMl`OU7)mfT z?aqBQ?lvKFHtQRv9nBd5ua1HW~y^A!b5yaIZ|SMj)(nT!wo9} z1ZUFn2U>F%68EP}Qq6cnSmw7WK~}OmAke4;W#ycJKpSRt8o1V*lv-yAV4t}URLCJg zq_{o*^?{1rrHf6o*Rng7CpcLerzF1wO-=DIN#bC0k!qz7-G}mgUag!v4AKz#!a>wz zcVNN;;9sjcaY@4#B4XFBJ7bo2LRy=)y$|J&;fN6xq|w+z7LCl1-r94MbO_?~I9Xkh z!b|aBOH`i8kiOwHP>_4w7l@PtwHd>rz6xa=wv9gg8Aq2zH%u|(K^x206eeyRkh?hK?2tuIo|z&Juw=y z2o(kh7k@}2K>dwBq{DWgp9Xo!_&IHrKTqg3rbNJ0&WV$x#nd-=*+>aXRTlwZ1YqNx zjL|3HICFseL$BY^4k9(Uz<2N@`=}{t&_F~LH6=jJ%ZR6*OboOrvmf|o5|A)>sl2A&YHLanTM0+XW_omY8)`3f>u0o#24ZPxD=63IP7JOvay0n1J62doK-t%&fYA*V?6{oGF|euMf31HVmv1EV z)wPq$Dvt072N`Q_HSKQ0!v2>=`wob6w@O4)>3*qo*{KHt+aFGfZKkYYaYb3S@yTrV zd08Z_H|NfY5#aiAQgAhjfUjz`)WoC#Q@eln+fqCyOP}T&JX6$uq!4r_OTU;<%dF_@ z5MT1sjW>RxLmDisOLoDca2 zgSEw!g=rbgs2PM}22IO&9uBYc`n1ewL*+ALqya-qSUzso4uRd;lJ0TDSCLm}3d6TY z)qN8yv&0^A2wY@=fM-g(zW%}(7}E{-xd-O> z^ME&*cAm$l%>sOi#L1T*Rjp9R#e2yO?rkkdS7BhNb??WTQ>o^0!W)H?56N4d_8S%d zl(DsOiI~L475B2B5TVJvtjB5i5v47HjaxvtJbC$qm||-w-kkfk`j4Mc28iPG4iolr zENSQ}Vvlza+^X(;Rd=qk>6_P7CrR#Q!iJST9q!SVH~ArrU7dbQfMPa(vycbjN)nTRdb zZHxtx0PwSuBTwt(^)x>Pqfz5}s3mfer>dhI018o}h4Z8r+6BE-)nylu*Y>g3sa-hC zcAEO}K^^$SlHrHwZ}3qnH4VgA0C|7_NLz!@>&h2iNcChl!dG}`2DlMSZ19B*@RNr# zzusaQKKFF2BQnO!4P;XfEVkL2_CpFPn&2E#xNwc%2F0 zibQinY3N*)dLeda^i)HuPcWypx~(a=xFZz`1HWv!`HLft%SF#~L#oTX2nfKs&uUlv zK-cRXfSyJecBzn!OZ0_sr|6B@y&GdgK~UNg^w+-*<)(+IgsFI!E10<;uG(sVtoXH3 zt8xc=cLS`$+enJ^tplSTUhPI@6BBD|jC3-x8BhcaqbnEtpSxdApj_ zptsSE*euVr- zJ1!!fNNap1Ytkp7wi|BuY87Saq)73Dn;=Z&#Kw98x^Vex%o$O1#m7wgVK9W-Z_t*5 z0ash*g7H=%-%v_}xVs!Ju1p2+@0n<#?`Bln}(rR$em{kIKAuQYf#_RQZzoMm^Knr}$7 zdm0go=5y!De?}2fBEFiJ=#O*x{QrQ7Fz8sSneqtrTq|9l#6$<)+zxFT#F zv-e0Kb3m^GpCD?U)L2vD|Jvr-|9lbP(b*=)hLnhIh`L)ujw);SqB<$R0uM$EWWaV4U_d zUTZL6Z9dl&cZ5Q&qb91_0{?gbD$N!by2Kvd1-uc$sUS~(JG6iK5nVm|r`wAQGZi`^ z^TE&#FA|&zPg=H@gB57zrAa6UdU*60=<%WtVxj``wtMF( zg&1X}8F>?QDBeiv6}-`KVi!a?K#%pAFM=*vPp-qVTav?9fNI+fL1(=wM!~wFI9dQm z^I5_Czn8w-HX#Hn4zw%whx!i+-z;wOzGWQ;q2mT{^(b)_SO(Ewgp$77i^_Q%BWV~y zBb}He#T@j(=7vNnULq!yv+n;-T2!L>60D+^E`;A|As~*Up8n{7eF3rB`eLn3^2)zq zJfE8+C%c|~O`Ic$>^RJdbh<>7Y|LR3yl4rB??V4_R}q0ngCR^b-Boam%hBYWcKfvH zy9IE+z{L|IC*i>sbrY6ef2FKIIW2=^vIFDb+i5tiiK`K!b*GcFPXh|YgsRD^v&~@B zuPbLG#H9MQPF8g_yf11(6PNhlf{VivlJSJxJ~eaTsw$6ag!&c1(9uKXXc&l!@JeSaX6& zaD>ZniBlwWvTOEzKdX&Y>q&JSCFXLU%9*l(^M=5khjpX^0X_w^0O;pm1xvf+RXn*Gt={-G9l9IQtfXLxtJmy&CfhiH`S*XNuquPq`179 z6>2fH?mn)%#U1OwL0wi1QU?V4qn2+KXGZzgC^=_^kOrnJxYrEn8iiAbpnO3@QgY!0 zAA1~*JrV{*?t2zQ$C@Ia)4yj!R`GEZ70QizFt_n;G8t`ioQ1L+U4&I|wpW+w8bW!g zmDX@wi@*Eo61a53D&C72mShJA_Th`8l>&-sl&O&xNLCH z`Sg8F*Pcxj+c+=^%h|}VGz>C9E&NDvSX%9GrpDD*uI@rzI--W#L@z=1wH%4P0i*oHZ zt-{Rfz|>QQm&n2`A+Xd)YQiV|lciCyHCj=xbi{KAl(uXs;WAANe@w_shAN3hj41?~ zU{I@+!P1O+0!((1C}~dc$vpSkl6#;hi*aoD?kqVkgVQb(`8aed_YtYqATA=YQ^-kz zd!X^t;j@TV+AQF#c_HW8L1GVdVEr&XgOsrIfKoa~&PWd#*?__7(GYNkSf7|>BoCKZ z^1`DE;6{I}bbvdAQh4kP>GZq}0T;3%`)=%djc7U2(n4J>GqNiG_s1`<7{LAMH4+lI z{<14YFLQ>?8ilQKD-yR{9pjkzlxHh5Y{FDSr7?orVQ4v6$nFWL&ViVHj|h_rQlmty zMUgO;Vac)|C2SIcm@kx*k^HPylHOdEpmc`d@>2&l^4uN3&*MR90Wil`R?(vU!Z3*~ zLS^b0nn|M?`i?5^nnKaK1kjx|ianTQl`1@L`?XB#p~xSINkh0DeYEQ{%Nfk+CYrUR zi)?ZfAz}@acTSxr)=VF^m<)NZ;U6F zOCIYjJXP4Fb9r^s;q}OBNTp}OgasKG2UkdTbA>0JoB(YSI7()9N*N`rM1I(837Di- zI1X5K8df7h3c)byO;Z+3Rz(_}d%nnEM@S_js|v=i_8ZpmUF(&pbqNl0Q0vpT~R+@(q!3cn1sTXLPb`+p0FA~Cmi>N2NR$g34&-F zE{C*fw+1KPXkKHeb#IZl(l;O%ZAy%-?7xTHh%=G1?RzM;NwzV$TcPo>gz}$-+X_$u6Z8l56%-ikIN;roV^O+M3-Bj#9#0lr-nn2Smz*R=E+1K#%*6lsaazhZ(QjY-6p#0S9h5Fx`Uqc@wzD*lwBX3(3B?lu+&HaNWEetK z7en-T0O?Kv2y8oWh#$ajA`Az3?-OnB^%HQ7Y?HFOYXAq^pTWD=5! zA6H}7DVfnk`!gG?^|a~ZWJZ zA}!(_50cnkY26h3r~1l?^9l4P&|#^ttFYp=eU(IO2bX^EEsbn={1mf0tzw?WpQhgl z=V)7e7#yUnIBB-`cC3s_m#H{AaZ}xUOOx!2dDT0m?D>+P#y&5X<2X!rh6gt z&AQ?{)^+Hlh@VFYc$GD*j9>;06rSoxPN?$wXZyg0DoW_;*G+@AjaC@Pl_pQ0s^sv)JgG$*-#YkIJE#343 zHH~;`_PcgMIe?l=DLl=rZ7xI0E^d=5{w3BcmJgFSgTzRXF>|NL-zo@j+jff0Og#;o zW|XQCT`NLKmj5@aksWY>s9}nU?}QKH5jxwL9z?3m7WEfIFC`#edi^6asyCX6)4a?4 z9+M;e+VtpCn!)ia*vg-;I&Jr+RCiFm%;C#J4g3gyp+9CPJRYL-g$R+rx|)xqmYYs2 z0!?4?8LNJtoBjVkDe%WD^y@AG<7kB!%(Xe9>U+=_x@*?uNPM;5AKuKqBEyfoi{!2I zWz7$0Q2sAEJ=5H6T~1)8xEV}L4Q6v9wJAoc4&H{_?>A-s(?B2j?u;hFCTE@Bq(C@9 zke*6!iy&!d4Zwzyz<|st{t2@PvSINIeXSuZkQzHYE8TGCweY}ZQAq9Ks>i-V-M;&H_v2oZRfu=-w#b-b4a$M0uurdTFGtd&pY_boiYo=cwfNDNL+t9t9g)p^Wi>Bp3RqIShAFwC3!gk&;x38%$@Ogcb&Gr`#&G< zKn-Q1hu(O{XOa*EKM)L8yt^U)KC$>2<7a+nq)(N z2i0jT`g+wVw?sQcYLeaboUuAICb`;2_6Sz?4VuC$`{J_v+j}p`<%oNUMX4mZ=lE-6 zxNnZ+nc`i8*q@n*0ga`~ahnL#of$W?bvfH#j(lmyJ^UXjc}KxfdF3O_=|LLcS(`m6-Adr-q(ia)y*niQs)l)_rEE9k}&K7maKwd4t`RU`*d zS(I)1T%p98JE?5f6IK}~PC*91Y;A)`!4%eVEzGIAG7t*g86QAg+flK_dM%sad+zMEMf9f&EvR({6w)&1W?s&{BX7ajsxbm0tr1+R!-AdYpzYZ`P_M@WmehxGzDIMAi7PjEjZHf zlqVdLmo|<6%DMn(U^@#3fe}}+8oY&b1txgmUkdJ>Jg{j!U!`M76(W=Fzzx*Bc7RU; zX9K!}kwnrX7ye-m20}Fx7zjk=c(5_Qp|h$N1#8ZyGy$dm?-#)9WwI5nppOl@e#rwm z{lbV9lWjyf$yoe|phbG`&8ks#ws)nEUUkFQHg2#rV&untm9-0f2GM0*%!(9~-pZyU z0uR>=!omCgev>7+>7*(U80G_VKM=o&B5(9C$Fa{itzBLUNOARi?W2)ev>Du(tP;{S z=-RYy&xNP+ZJ}7gTr!U>!I0E#S2H-BP1i{vtCjL!#KW;NOeCm;)pxuoQR ztbeSzVi4*`@dLZApQagw3ILgO!gST)Y6QmfC#>AM>*g&^Eph-C6!gGuOcUYTT=f)hhpcFRUF(@e~(+8A5`vJQy`%k~gJmK(g_^~G4 zaF9|&Fg?A|hA5D~E3bPe`N<;Hla0y3j2!8>+9Ex)@* zX!iB8u)9D3^m>EmNUvbkHDu>js#wtnAj9Sl1Tq1RZfz*tN47iUkg(bOrk2yhzZ7ue zeJ-?WX|dYs-WR#%Wd%0+IDYKLkS?k#Tv5t(tjac-lCZ!b3dYw*2t%B_5UHOUauUuj zj=75WE2g{27=PsVG3YS65{ieM(V5Y;s_~!+R`Zk^LUoo=^F#+16I>jmps$#1A#DHR z=GdecFWnw%ySD$kSP3<=H7q2IyYDQ0vdHjqIuIc~)X1kKKZ54=a?y~dgYt2)q<@rshl5|C)sV@jH@Z+N0~tX)swoEi~rE2>SV=frHQ5%NirE#%y9 zZeX`bT5w!|@#zF}TyF7Yjnlb-5h%$qd-(7wps#Qm1}Y<_YY5L8lEWKV3uSC70c_Dl zz|o`4dBfdK4m^AaQGFYHpiUJO;`V5FRq>NS0Ti`f=R-+`!}53}0$)b%hG+<1?kPU& zI44+iAt&zo`RZZjFv-Rk_vBv;;ze%q&uP!vYBHMM=h}3J`TKtO-47H()E)8g-`q3$ z7TQVmnZwI%P`@^H5gCTUHRM>E8D{UBIBOP-QFJ18;d=tEPBKY&wb(;@M0DVp2TIxq zzCww}f`FwiKqz;>fBg9h93v{>xNsQJqMiU3O~^a8mXdEP}5J zhL%voCZLB58=hLTDpuQr30`rErJUOyAKkLaMRb^fEneGDs}`%Y-q5efJ@H3d%S zH;K6>YtVS>7f;8oSIn_-AkaTDflZ0 zc>vgCI1cU-%ANBiRZO>Fs=;JE_dHNReVy~|$dBZ24;P-MW{HCY?$2Oa^j<8!+i&6j0^JR8~NRQ+cfc&Go(Dv1=YgE$^^z_cUmj`npGEL*`Qi;(XL zF68~93%~+S74=P$NrY=#=6?j;yQ-?uRl zxEq3qN{SC5Be1bIPyy@F0q?9r>imW_5cTE%+{>l~h##KA^bK>P1 z!H9@t&!b7q{%r-JN*uRN*@g^ImnM$7JD4=hL3HXduWU;!OdllfBOMdL%%F52EL_*l zTF@RmQd=nDj?>F8Q3vxN}~c! zP~Yv|O?nV_9zW4j(xfNu*N zAs9Z+UBZt+bYL}w=i->!RMIpggnAz`=mSLdL2|04!W+te?~9dw_4QCe)~g#Z0n(A{ zlSc;ss*9yNz+jmO39g~jAS<9%f zvCb|KS)d_d;pXAHac?>;;D$l~di7Po54%nrEx@Sovtn22WFaS`S|itNY^G@Uy7+l_ zWa~J<88LBFw|PrC*{n(4>BC}~V1CbXd8}?#wHN=b$okt_^s2YgW(7R7ijKzm&ebi+ zROsPC;JzNTv6c0|La02%IyKht^aqL#e!wqTjLm%osRKUDF03W*1!z$UJ0dVWb)~_4 z!hFkOxW5B)hn&}POkX4nLlsV?VRL9oJflhrYp^WbJx`j9ao<5>tG9Sj676m@8>GQs z{SXm}MNb*DR}T-5F`#gFGD!P~0*sRZkyt*fud^@V|ECVPliG26c2o&kFVqjAzrl^$ zjoJJ+!`Me@c)2=c70Xm9O-0+y1MZ>y&UpC$k5tqbsIeU6GaCLdhr(l}?cw|GwD`#; z_PKVaFzBssxTEb|)da*nKz;>C@xYuQ5z2$PA-Zdi+Vp>$zhmO>x1K5)#nSv`(*IV( z2|{bo`ZQRU{)2^->>5MK=>8_7d7}|(=qUvdocResKo_>+a^gy3)iU{|55#Mk@$#ys zL#AOqKsE~~*3EK+br;y))C{4fJ42)Tf2NY|4OL=h1-}jK2sTuqF;BlB#zjDaT7-3* z;}&;|fkY7mDmQAoZ6N^o)Hg5sIo)YZ|HCE(zHhSrVZ^?QZ*n>jFXSpDfgY(`mnb`| zk8e`k1TtHd5+=(XB==Hsm_ayNPqmo7?>%3{8U(Wmykfu(A*7M~o{!}0-(rLZC)L>A zMX~`A!U(I4S&Xp04nOUJ+;;Q5oy!(^s#zPtF-+s2?HZtd0cG9#D6BQu47)*eBR zR8aVw$tY(kvja1Lf{Tz(%v=n1Mrh#%%0~N6#W~BA%Q^x7V&Skf+1S_O5oWqF(O_qD zw0aLu1Jqj1IIy#00XB~RO4Utp!ig>G7rwgqCaoS&#QIgbaiQ zE?ymFCp+?M28_OFhWLT69HQYgh!g? zO)IwZ)z*hoy$SV}@|H-~6{aLvDFy2T#EbEeyS^l@AjC)lT#^=UhxIn6pr-xA(36fP zh7!PkZTt^GG!!Gx4J+7L21wl5hk@^*hwctSGLHv1!B{D zIhXzMU09-cLvW(*a639Gwn5di&~Di25xYc9Bv`rye}GR418QZLftA{spl)zoMsc@@Vf#1eMCKQOOKb-^{<7Hs9N;?v?Ei;*Vtu0v-6(B^ z!P0!rSQ&~qpc+FYrpc6Lqx;q^i2^sF;lwe(hw4ZTHEqX7ogjRzZ?7m+XjI_or9@~B z>H75h6T(k@lTOjSby#O_Kt!T&v0>KRU>zIq7Pfd4cdbe1P-(bW0Y1GVPS<@*zISZE|| zM_^lyt~cA&DM+xiJrKcnJVV1P#ycj|zx2(_;fYT6*wDqFMX zyK(XGNogZ$+ejg?9p8~4*#f2Bupb*W*+NI6KnQ&x%-0bgDRiUPzTXaED;&QDPyzg# z4l9$UQh|-U*hA173=P?ZdidF`KlE++w@Z$-xulK~-hN_PB$Ntsrbs z>mx-=#_HT^W9gfUwzY@k-Hw&N?mA8Lh}I~1Br#0~4{k2?OGU=EjSr8_2@Xu)V4$(T z3A+fLJOE-CweA?TW46-3T4v4$B910FXXMHSM(R0G0DBy&9B81yQ#U=dZdYT84-ht= zAn8b`vpTwX%cdhSYf?0@yUsIlxbwZ(d2H{)agJBg>K)w^%FG;_Rz#2`1{NwD5|_mt z#30IJF5%o)5!kVUthxgG6;N=H?C=4xtc$79N3F+`DTiFObGOnW*4G5!ow33_GUg6w z27Ytr>B*8hic0N2BhJgwe{>xw()cmCUwJQ3+WN8k1wF`Q9t^)uVw&I;CLR12jB?tB zek*m`L!ffr2P-W=LvUEIiH4*JO?3@Z_gpH$LQ4TZAM^m0k~MeKt>OMr=wlncW^5UH zDlKV|?}v$lsz3mP+uoQP*NMq~=9i}+AKx07PGr!BU;QB{poi#HCcPvLwD-&LQ)4S% zq9p4=IhrOH{t!)Dh8xPqv1L#kalB0V*u~h|piA{UV11BN5oFE~k~4*g2vGj0!{(~A z6mdyNK0}#BdqW)un;NpWBY`G{fILmT5D34(o4ugDk#}BHG8HLS=iY3|pfvsSqtM!J zv}PGTuv&hfF#QZ+wFo*Ha-bD8Um1+lOKF7!!@+P|vWKp-Gf8+o(BNLOq2LKBmYIC1 z>;jV;RFJc+6#tFyGkB;f7 z&Pr6)VaStnUYq4b?jIVCNsXZR&*D#Cu_-9b(?0Ow&w6FmXf7y$gb=^VL|^9bP|7|O zbXbh`DxQ%rC{Aq`i4uvB&BWq7y?;oI7*j0G96Ce5v0M=Iv5ZC~^7Cp?bzwLQQKLYe z;YuQ;y~oB2`|z_@UxFC&UARmG#Ot33`q?NfpMC!r&Oc%QRm%Q$kp4Gr{vu*!Dy4rC z#6LB{KasLO1FD@F+Tr-cTW;J?sc!36@!L>;!}Q8Pavttx0V2q(xwEE#S-Nlz66@=s zrLVIan9Sez5REMf+J~V;P{cNHFzF-N_1$$)#1qVtgjh2#IH9KNK1o%~Y_10K(NFnJ zN1c*23$6cl6icvpG@ZlkPtW=(;frpDk^uOE@Vv<3^LVH8nTg5Kq$=z}RuT8?%97ml zF4&E1PEBfXi;1NoLqupaefrgG6sCr?T0o?m3~rkxod(BhyT&wFz=GQ`@a@vlAiYbW zP2dCuv=r`3A@rn9@vvP@^nY@UQ&kqZawQ>mr4c>kqI=0LkTw%uw|cf+{K9)QZ^w!` zD0BdW*~$L36e*7}wn8F7tBn#xIZA?|Up*PFAbu-~F&446&QGxdLll1+RnGd@f{%!3 zlsF_)tcF699=@oDzx72>PM&Zpx|pHOpMoEuu2J5Ksi2_iTWPeW`e~?=osMtabcy7^ zpebsld@?|t7dZ`@gXzp;gKEsOJQxdyGN$sIE*$?jfG)$yStiL`gUL=X5>#w<9sr9V z({kQNQKqfsvyF`Ny0j>DpTfwsj+0b%6=_8JbKcV+EmSW>ZZSO+4&Nd}YnzMK@{SVH z55rfBcr;B)%o@GGkd7$8myF8L16@A^AN8c(I8B~1Sl-3oXDOUXUot|!9$F@?W4cYm@Y>Q?bexY z%j@oGfy~>o5@Z=fD;tnI2Ccq3Hgd=HUd|qkaH?k!Sr6Bb^ z?14&Zd8GxGq%(|n{C4Wph+AEpl~Sy9k{`ZsSvn%nBd6D!bQvHlM_F~TGbTdGs$PXE zhXY02Mk;#COzm^C#Ctn(lp{Cp$=y<5H2{n^T<%DesQD$y*h=|i_=sYaYYi~4T+!3y z`tN@4OhfHK19);z$1uNpBw#4bMUtYyQK}y?Xf*BZfWP*Sw()6}nrLDE(pn z4yaTg8Wn({QKh_IlsLhQd(v1^PTlPf_^Mq=7!OW0*&k@L)dD};9~_PgJ&4tLviU4Q z+gPM76?9#Eb&a-hhqPc2^>r{XI z%C3(BC4pnw>m+U$6*Ve$G%B|CP3~096TzCnpq?{iCpXi%0|LOXT%U@67hRvTfyiXJ zVS>WS`dD-FKRmy!2TTO_zR}gOMtQFPE}M+v5HDiOhYuhSxSD?sgBnWDAmGBMbAqv8 zPzM=|y)D^-^6nO0YQnXpJ`~}j6Z2RA(@hk%IUKqiDQ|$G9bnvbVbY9~6}d(HJC!Z| zGt=S_%h^sT-Ujwu@zX2Bt20OXx3~Cj0Z7iq%c_-^+^tUiq8`IxObx3|Hl@i#(4*Qc zok~G8{Q+q**LDV`p?@1HToR;ScjE)*Or-*q9f#l)gD@O}z~~jBZgz192iZd6AsJdp z@y)U015}-Ix_PAIYSkUbI~){xZD?~NRruOwMh?EX!f2C#R^%MD{j4i*3abPoGqr}b z3%gjHeG`Sp-leLBBgGDL4+McMTBa>jx5MFY>E!>|91}a2bZ`8|WLEYC(lQ?(uYLFE zs*TXO`YG=|G-&pRq(a8D63TH*fj*^ZFRxEGk&jo65AzEY?1x9#NYheRAg$27?&5sx*v=*$?2jGLiiO5MTz`SYnn zw6*MD@&;Z_S`67>4dQ&;qw)8LyS{m#bzNgB3eJBC*6e8U(K<6kC?~10xYD&fT!XaS z(63S`4^8^DmKXlopud%@HIU(5k$%0_e76-5zRFqi)+zS8F>{OUHmfjV%L;lOpVaFPnha?|859^Oz&quvsN!ms&UXuz^Gs-)zt$qvRRKVgDSY!k0 znVVV4We&8#SkZH2M#MEe*N;2M%uAp=R=BEOZvxZiE_T1+!`$PjPT{pAJlTfxt3Weh zg?$TC;Mi9+xPP|b#Yw`4n;Y!p?V#5Pq8p-epYFKsgFBvrG;1?OY|F+F<0>=H0JEa5 z^2rNbCiTYV#BanaoS0!P{)G4J`+!(oN^mLg?zt8-_KDpYlwS9Bm!w z27Xew?3Re41GAGjFI)(rvJ?pr7?nyk8J*BraPbu8^Mc$*{sYnS4Crlc#8g8GkhmNw|^Y z0a_IN0{C(i=%q2|JdU^X%ZLzfHm4M6XT$U`J%(IvuX7qfzQ|1;7jipY6*7gC%8^@prWVGVLGLk)j5vX7M4*~e z7&U?@`TrIvULUZwCPeA{;BBi*b! zBk*)2OGn@LsvOByE96XGK$)Zws&P3{0 zhZ#w~S9wO-L8ISl51hyS41W4oI2-@yiO6_bgT})!5e^?qyE@}?9;kR;(!W%fSIr!Z z2Hx*%dKPQ?egRNTi(pTRASZE4rN(;_z?Ob05SUNoG({3o&-^wGeh&&ne*uD&*4g@i zFJ99*H6cE`1|Xhr(?Wy}85!wyV}DW-ZBDp!?8#;>8U^L9`s{sNYFr34a@Sp(oQZpU zlL>g~Q74vG0E;0xL2mP3U^K^}hV3iw_|OdofdRwQKmra2{6*^o!VuYI?!f$7A3+mRmJf(Gps~hv?k{?MAXD*4bv81tZM^cdwPxs7@vPm}CnK zg`DJ#22D7Bp?@tKZjCBcgYxvHMACmRol}8&40>l9tChh{EHw*Gc@h`}Er`nZc*RsX zT{K|%htdw8tEc9gLmU~P{R?1i z9|NSLyymLc6-_CIW|8pkF>-73(ZVdADuiXZueufe{sSP4>*jTreHvlOtDY0YTU*|Q z%IOeYQQ|#E9*Nl}t3!5{Apa+Ciq`%0n4031yIEYw{4XJ#QB9Ipo*LYK16KMr!- z>NFvB3vZ@j%NbwHDKIiqy*b)Wjnp1{O|ytNyICuV?7@NObK)LagGU&}0yc{)hY;fy zf~LLYq*_Yp|5ncVvNp-Nh5nw$Ja!IbeSEQOLw1VyuZJiqmL~yh`2IRr3~)0rZ=3oLraLIoKJPN;$V{to1vhxA_-HVRuEchPf-UfCw(G^1`+nkz#!=<$s-3CL% zqMLb!^qm`d?C*dZLB@q4=CvSH3763@%a~V-<>&^21ejQQt!VhH!}$S3+aT@iM%^h9 zWLz=#Ed*{l+I2a-9+f6tNPZpeb%~?9r_TZcu30? zceL=ubIlTs#gMr;E4sKlf_~L)AcHhDlv=t!2jCjm1TCV}O8ZgfUp^HntI)dTvK^ew zkWIISuU^_fLoJnm%4x>{K*id#uGcq-@W|LIA3;RaX2ucEvUfZsj~)4^G{l5&Zf1o^Xi4~_9S1V`GzDsTM?nuxtJhCJlh6LZVd z)ja(DT~>%u)ntb8_*?*8kxmlp|F%rkUORrIHb{Fjf%y@nqH1dwxI5B(aPXHml%I9i z{r|`b(R7c!(A`pl_kjSEBop;x14QO(BS*8vT)?HP|F;=>i= z2m(1O8p&I}gI0?e{Ai3&BA5nU0uvVw992jTX0!-P>&6nE>}}6S1gp(cOLY#SuAhtN zr`lqO_+kPD6e2jvB9HH+4&f1+&81;=>bnmB$zYZLOhf zA&FD^J_tG7IbCC_HU%c%NHOMKwEzt_wrQ5R4{63gCjVR4Ll1z2+It9OEez(yPzwkn zl{AAsarhSPguSsvUTDR?ClVl6L(6n3>BT={sC)gjg;yQT)lK?w0{vRjq3$=pbJeMp zHUhUaHrs3hZ&(){F4B#ep{I&UntB(o(K%QDTUXk1FBxSMYtWm}sTiz;nO+8@%--FW zP5WcC!f{H@Tnw z^0Cn2%JREhs~M2Oet4%C2a0vKUP=0Y1S>VM<;^t@$id9%p7O>xVT3tzWMbUBm4{-% z;&}^t(d&8NX0g*ef3z^F#NK2I^8)oKiTG_*`(a!gdR0R>u68_aZiy~9+g<`*Lr9ot z@TYbHw_opMLJzejeJZ~N!Zcx(1ACM?lU4irPo_1dsD7UfFuB!Fc5&mq1Jz&Z7JDH6 ztP!swcJx!VX;>YDfPhsqE(1MmEr{JX9rZ_S;(ucrdI_HIFGdU~I1B|Hdafr^(RNxvq6*0#U=eps))D)>@}?#aZSH zFpq#;2VgjWQ%)KsINm8tovcb&yY|pS3FM|S615sZtkY{=OyT2qFF2lIV?`q&Do>Zi zT*8TU46c*2l^GK8DE1#d`h^c-Dch1OyO@OrvE=*wNcygOGuy+cEnLPu%M6YWkD%?_ zQ^?TAsni;Rt-FFb!|O7-#`Cjcz51*mNEk>K)(;p%$#sI_Q7 zH;-%F!o_5yG>KfpykO`AZppYJ?)rZq+#!1ax%=x9=FT1la9^-X`IEM?)4?_$E&bOJ zV4Vg&7CoYJRs-9^PAYd63wxEU(J;VFW1{Q+(4`h+S~0`T&Nsx-6K9f7aQTEJor7VN zAW75n5MLPnFiP1t6^ZD`)vDuGw9O5PbxP*=5f$qah%WQoAB`SQauRk@m9PhVz9;(mBVoV@P zU@tlBx;{&3m%cl};WcN5O|(b^1_pg+gn7>SWAlhPM#XJki-Ck2YMj$6H{95HhJp9V zmFQq38gI5R;09w_EC(s0V6oGaF(YI+oa05CKJd1^y5beJEhTHZcpCJNVXD@?;a&** z1!2E8w4%b(=CYhQCrz=EjpPW{st8fdy2i9NSL=N8@r>tSPjg!9rJ@d?-!Y$ERt8>*I?u534)eR;YuVv#}*6E$T+Vyrn9MGJ*zrN-D+H8 zA0wCUihweyAHw|vm<(EiBwzjz7b4!S=slhF^wOV+;=5MQQG2 zOxEepa>4>in}pNrL!J%Pv_a(crUSoBO3Vk`QTyykGtSqq<6+!nrd5Z-=8^FtAYRcX zk4xCUB}I!&~UvkSM?v>3XcYk~AZ&SdtD`yRs@-^SkdrTQ+2LgY$x;_h^< za!&Wz@+vP1Kkkn3z(oI17pfH;JdF)xC#fyA7dQQeJC&4$V(`1^4M}`%$!huK_FtP&DY|0h~ zjv{AWySC}BO^`h%k2dVA0T{OV%wUR8j(WJHQ-s-!6FAFOPirp}p1(}pPSp_djkR!! zr$;(`~>IEI4?dX&~i5LnbwA* zB+)cG3T3rp(Dd4*^Vv$`i22g_cB7>WEFDXJ6V7|Qc)Sh3)$)aoGST{=Q=p~S?0--q zm|wBP{ro~Q`X8^XKE6(KFB4mRH9Cr}Yz?<sYz&87)-c*1ej@u$FkjjaQ6QdndFuKtnm7zH%`Mrk(+6E2<&g*b@@E)%uvI36H zA6K>>JHW~-B%>$0&P>%w&MxU38W`KgBG_e>`a7i*{C80qi@wl+Gek`*=x89!5eZbqIMa+*)R9`+K8MqwQ@y_(z?H?D=a`w3y zqj+jPZM}y#kvGyMD}#n~w3A_#BzEGR+#ROBn9DiwBx4K{7jb&gK7}5njsX?Q+ZDW5 zPUDNg4^N&l!l18;;{b=hi5(n}-mAzM0mbO(WsD_z!v^qbtVe|LJmkekLgOVh>64*@ zlqFM0uZRpjFvh1Cig?hfaX?eLd_~%+d#VQX!!L>`31Uq_Rek$cjpdu0@5U($dU=(Hn(dd0;vmBmxJ1?Dy2C+d72(u^C!Ga*% zM!%T0E$2=A{}*>qt%Gfn$`{W;Y|h*P+~_gQn?1ekv7J8)s!|;)F_u_)^EFn!1A(RzGc+b!(v0k{@ z*;#2At&C1n5DY+O{2fo8VAH30UrKPd$Dbh=wj{!LY4zB0flgKgxmC$)vOTx{Lh@vL zDBNySKBJse>iqx|cDc+SoZMo-I0z`(7S C9S?e+21gD+{9)V2C`6@VQJE7<*t|Q z;Tnaz91OZ?1{{kfd0riX9@UvuL`OaFZFvVfMzWhT0@@QR*p!t%>Tt#(^gxtE_>CVZ z75|mPw-_lS`J%{3-}=rC9bx#ox=&aI-)We>BX#b$TX~h4A)}720DJd7WP|vhyQt7} zTAW+|Z()BA4z9jQxT-)e{`C^5NnYr>(}wR6$t$(%+KbJ8{Y@C*PCge6JJ8J2KZa z<$C8?s7W%x6bkt3diskuw8yui7cLdP9*s640U22d?eHDXZoGS^J?cZd&zKVpsAd=K zz+uIlm&+qqsXFJXlK3z+Gk67h*9Gc9aCrgs>u6R!E4i07wn?k%qM{g!DVXZmV~G*9rfj~y~(-kvujFwXyGW;aDCc&|bg_WmW4TeXRHgd6RbRT$zgfHM4P>X(7V7uJRA+H^ zhL#1MApMO-$xTsEnZyAR-e#udAOPH137Y$OjS9uE10w_(`cuS5?>y-i0(4Ubm>s3C zCrdUGa~s66H9wDqQ$ktcuyFJ69xl1Pw{^VInkSrE|1DU((%s}!2>5BL@K=j&;lel? zHRDC$ki3Nb|M1F2aSWtE-R9+w#zD&}?Q)>mWT5o$cXbUQnC|<90;XM#km4^Ax_;UT zVe+PH+QY!H1@(oq>S z&A6H-JZVs*1XG?QjB~GpUo0XK|9)HMv@az>DUiIq1}7bV3#9i!EKPp7Kk`wWD*kvJ zV*M&1Mtd2`nc21lUX8cqZ0{O`i6ej*%Yy+3`TYbhU5|vV4m9AD(gSsELkcZU8URk_ zKx6Xnwor@sT&BcamGQpBj@gebn&bw>BWyOp2djQov6jEcvJPQxyd@omH+hE1KnOD#xpylbE`j8XN_IWca(tAL;-5kB4Kl8cs z=F;%qZOaAPa#TMI%r1R+V{Z8*Fh&%)E%l;_0ff{=W&tb)mN7>N(ThAw5vpH)f;P61 zGEP)=5?i$Z0bi_Cf^=brk2U3-YVrLy<$QW?$mQt1Ma@zUnrIqj>X7cbpRCHG;1N~N z)T5A|Y1-1G{;*+&T{J_rw6a#P0+^O$L}rMajS{!SG|WJH;HH?BZHGy$&(Yi9#nnz$ zM}qt<5oHY!61ZKN(H_Is80HnbXkCGktcNIW0_M4_h?vOYC^F$!QuF3)g~RseXDTM5>^%)nl!6lxjN;)P0cr~Mh{3y zXaB18DaOHMhQgY*@o^F|&!}wcxV`LLs}yw&C-LN#UG0jnU^buZ zRN%%T3r%JtRhdA-{;P?4qTlX4>UYJQf*S%dP7Ovx7J1!bmrq!4(la{LVyGhs|&+T z63|BZSn>OK2g=U&>7?Tt(6n8@I!#5%Tq*Q!*26G$SM7)f)bV{3gZ695x+f`4U&ESqf=l6f+1(?3n|HDUmyG>qOkPiBWx%7>t-a1_ z+M?NM3@j&%u>+fZFAxc++|=>E#1*$;#GKsUx6%uX{XJd&_LR81=#>wjKs zG|&sbRR`RxKrh(824vH21gU>2f{G0+c&uPK z1lCWU1%7-mM27wH*9+SN&K_a+JU18Zb%DjoaY{LlG-{#4#~RoyE$PPf!YX65+nUa;$R2AGXpN7SbbseZO_CLt<3$M+w+6i zb=_|T7YBuqC)s)oj)_HArsw$96OMG4nOz8rd8I}-EMh3-gZRsrdaF8 zbS}M!Tv0ETi+Er~Uun`;N^%my=#NsqUj~8!fJ+@}(<(oJyc#2LPKf#Qn_DgphR?}W z{Qq^Jg8EYg1|bFP3RvS3(g_^IT5k-E2t52uQ%i~+D!H!o09qVnhA%+JcOdzQz8&go z%07l-p6y#)d*rU`zkhaeEZ= zjFjwO`Qe;H*piE~^JFWhDxfn=auP7$TEl9M5^VYFAklYSf@wB4)>1Xki_(!s{Tal_Em`xJawpj!Z zGsM!HQNV-bEke6s}kJ$!%`nwnWb7%xw(n^8z8hw!BdXJk_Kk_5=ZEHWp(wX3{6RL&HXig3qsZzP@J#S2a?<1nOV6$Ko3W zgv=Qv_s~3zE2u6<*j(22)-mp?AA#3;y^?-6XBT3JLiroFgBmU+BjofB?5_#PUa|^r z!RjTPk|EL5kVL{XkROY&_@pAjQ!PMxvL%oX*+5(i1W3=X|FyVcw=Rtb%rXIXKVY$w zo(Kg7Rphg@Yt<8|w^&eMhAx$(pm~S+8=G&{S^&|;d>j{JkAw00haf(W^zpVDyXBpU z8OTlfD?UVwp|KlP!MK?|*e$TWCRFEoR`oEy8yV9xMSuGLB zVWV#CovXrmq+h;|j|elH0-dTj3HpS2>JIA_Yzvr?a#IsR5D~s>HaT}`?zeXKBXxD||~99L7& zc|Uy?I2^$uLG;tJm%B4<_9-N{6)#W_cGL#v#X6AP!K66;ycAd_0`F&3n?bPwTgc6C7l8u4B=%+C2wn2aZ4<{?EQKRHL*>`ShZCLM$_q=c zSj!`HmuDx$2J?-x6_vdr#XaNq&P<{_AZ}``TKDHvhAAsD^2kQCLpit1v_%b&(z4J7 z`8>+sbx$hX|1*<*IXLEx$EfX1^96g? zOBjx>2d-nEKybVgA}W%88leJmEEHX3TonJ;rMtVOr5i*Lq#Nl4SwUKA2bNgSHR$ec zX$eUsb?um<{1W7e>+Q>YzQtn#O-r576+7Y`ZsS%i?T946#FV3_77^1 z`P`-0mhEA++2>~Goed)0smH-A-_qZY!UcG6TJ+mGHecZ;o0Bmm;DALfcXe+6NDrB- zJ@);4kE1T}ox(K>x*!cxi?Zg?8U_JAts+yLx^JFQWQEJEkF-w%xE=I7IylN%Lxnle zY*&|cf(?+DR7^8poZx& zec;`Uq~48ZNlUcD{6!9Z@uicE{(w_zmr_=EM?Z5^p0+sigdfhtXyxeSJx8cCi{S3) z+fnF`{v{im$*%Or<&4hpi5i~Y;yr5&|E9nd1#bmaniC_J8%U#>@TfP4{>irstNz?B5`? z@zUXK@iMO{{a*Oa8=)lK_>SLRtD0NPq@Ja)`l8UtsppaHpy*Aa+QgbkTTE>a|G&IQ zSb@8n&*1Ak^OFkM#JTfHLW9T?_VBCYY%^81 z52S*l5(F=_u)87UO*H(+k(Jb+@@&heGyF5XhV_{e3U;{%GvBCD6bGEWZOz1~lwvCf29q%-w{(Y`eQl$xoGOhQwd*!#wJ-Vs0un#1++M?>1X)`MroATMcDdDD5j4JmLgVyJ)l(S0_&j;n# zmRha3bQs5QVdo9E9^YuFrFE$@vl15;klNDy7K)I9fj+_}vrsJ| zk=J!6&5|1K&rZyxdQr^ljKPb%+5WvpliYzfr7zh&%%{Bh@+nJ5s1WO?Q&Sk0WR~Yz z$5J)lIPhl{W#R2iV8G1v*K;#Wsf2y+zGcp&D(8CA3v>L#rvPD-k>Fecq7nC=zWBtX zfcr@DdaEsxd8$z@Xz+(#u%P>vr-!feH2|RDvgybW-sOzd3vChYOL%KK<4qGYZ;rWF zqVWC!e$^gj8tHxa{#5f2JIcdhW~6LeJxOWPde|!H*{P~>nu$DFC7qS_=U}Jy>0Vzy z$-x-Q>aEq=Q>KX^ho@g&52@#!ac&DiOSO0YRB>>7^H%xEz~~(f;neFJ#@uv*G5J4l zLR74Ch1#V^oTY=g{csna*3<~ND|Mb~QpN-=eCiqs)U5xJ&#O5j1?kLEUkO!OmB~@8 zQ(ap#y-{LkwQ`uU=%#+7C-cj4U%}>rh&xjHQ*rgZH`(HEpM8yf=IPp-yhHf&746Cb z<=>bph#mRBJeu!w1u8Tu#XEui^X7m06CT6@709b|r_u1dU;GW%M4-`MY+2V*yMrfe zKS^O}MjX*uNIO7q%1VGv?e`C(&k*UN9KIXM9(gh|@{bs zfDzDa?|gW$^YkL7cy{)xt;5wc+o`<)&07Lol08B$?Hs???WgL*mW_35Xd%|Y`s(5T zOWb`@kN#hgvhw*wC}hIj-a`z~CA0jk&N-Et^z-_(4=`Py!*Vy}5gIKLI$(RftOTs48##dh_&> zbP#X+;tgY_n_gqCQufEl>^1k9(8hcB+r^66a++=IZ?J#6gUNs0uvljDzW}ZXg_5R; z8KV|Ml+(7ZSMpDuIStBpOd2qt`kthziQ3z{H6Lrwp>ek(&&~zkUE$E8kF0q06Dc#& zC9A^H86qHU-AA+Ol{Qn8pcFCUcHx%hJeGg-Lh{Cu^tX(Zesl4S*Z&Y(&+k1 ziZ$bU}FJ=*}?pO^1gYKA|#ePSmGik4JUjuSzn$ z?h^Upyl<0IZ5P(A1P2$^)z2&@;P>434Lcn88=rK{yv&RLMVA=i<79F3zB>HbPnth* zHzr-KRzQt&oZh|xn?5!j;DF${OVWYeEo?u2T%|tRUb2Z@B`}WT% z3M2FjuV2MSQnOkyr;Q-GJBE-}3UAUREZ@0ir*n}+89eQCVuc#ELRs~B-j7YoKGGkX z6Ehnqg1CEq4yar{foT!%e?$xGwmm3#_*e99_GUm-o`=brc=FtzYkHp#d7}Sp<`+{r zW~s9o-xe3K6i*vSI_qZeIIiODK3_J8V!6KdF;bf#k*0Y5Rn-|*E?So4O?!}i8|1y~ z*ZJO`O)oL>;{t9>J7Ci0MXRZf2u(<&+9HGB)ORj?s;>U)MW;!>uW!(`&&fUAU!joD z-ODwJ&3FE1@9u2Fpnc7@DZHY357zV7bw6Xpdksb7c0V~)Mx}qL3_P3`H|Izcj2Gqg zEcQ|;Y6-i~Cf>;Lx&w2KkMRokoP2TBzRJI>DdUogN_vGekI;XUcON_wzcYx5ti46^ zt=7#4BKQ=c-L$uTE?@mrT156z8S=Q>Zb?@1=j@`@+2YMd5O0jfrF#q)3cvdM0+pOz z1_kc)uzCC?*=t@-K}@5e-G&zDx3IA3LjTi^K0cV6Eu^1bNYk<5Q;M->Yqj~UdKsP0 zmkihM*g*thIu!BYw2^i}w;x5vtZ3|T*PWFPZ>7iP@)(e&H~Lc4nVCIgjXLOn-|;0o zD3%n(bOX`Jj|#ei&9bNREM*G92-|egwQcTfcG3LCCqe#BPv3Yp0oKCuPqqKMpR(@M zt1B+O@xt$a*X0r{T*OT8$NxPa-oMPhG+?|l$}*qgl75djA+#I4_}~BLs@uPRcaL#= z=W3I_A+Gek#VfzQX2Xcv5cNNGc7IOEhSMedvjcp8IF-qUTuk)j_G11;U~-LoRQWp^ z;o7m)&JSX%3TYRbh3)?js8n> zm#-@=A3me~FBVO8te04*Z~KqMD6~m#*K~9|$ZDMaF@Hdsao5lOwh<;K8Ja7oVZJ5G zpC%6Tv-znpNBfTz-x}|R?schoWl^~@zuJ*~7qCxr-}vL7wD}*BvIwJp(+yJd4w1ud z{Q6Zj`kY?%~ z3r>q;hDD?QChu6!j2noNz0%fxl|uwJEe|;QUsyp^9sjAz88Moqy}I^*_|JeOT*le1 zqyA+ty5_zgm|f=-HGBM&FTVZJ!mnDsU>Zh5g|QCupl&v!{nxgNtE5z~GRY*`@zGS1 zlsVPE6g8WQ3+d-_ZqK^uqs7UfJ0(-@hAb}^Yo=)$mrAN1T(CQC)%S2FyD9i`Gv831 zguhg@e>%FUb+FIe-&IzwJa3C}ALY#4AC#G5p2vQ@@L2%n6P|3HlwnK$WUGnJhzL&v zvVOh)$!~G9yh-bk+_PwM*mT7o;=8VLrz^2WKD-+o5tR+Ui!f4C2w75co+4# zOGB5!)vkSN*3o&_JDv11yZPqqFHPaqV%m?T$vO1y7ACiovaB0#3*;haPK+nVFA`Hf zm&HqId*`jh2)>zq%RoFUNgfCqriU#JeR}5N<_%|QNdM#r$-8mP+*8o1-B)?tRn!Wo z;w8-lq~y$yYjw7kWcs~kCaC|VAm1Poz3%_Jmvty=)cQ#+GfNt~(0G?#z)CE4jdJ!1R2J6&xB z3O)&y9lf_wrMbk+i1{ZFmmsuYE4|EwcDt6#@t;Afmb+WhZoI`*LB(hNrEhW)NCj^vSzd(RC*wkd&4$%5< zHzwS5xcz9_cHg>db|5=D^>xj8mPDy?a684NaAkf?H|Wi>0`Y6FaF2!4=c|(Yoe%|z z?38vD>Fj?&?QPt0vSy*Lw3!O6Uezo?cp)D7!XhNOZ@;-my%HH*4Hw7JQWC(!pXo~= zI4S$9{)V(Qv$Ws9dc03oT>Upc& z^R@nwwAQyjigu@IfyUJdF;^-h{`|;4b&;3gyUmy{Nt4Fwm%_lFE2SM9xNb1+M(t_j z`a0A%RPfqo_|C56cfnM4gPf<KMLg?hPv#e5(g z?)&WY7GU4Ht%wFu)Ms$ zvrkK26{bQ)agOzDWACf)&uS~*wc^#)y2Ue@Q0{j7YsJ~2C9nU|Ps}KvXW4*%;j$Jh zr=}>m5S!K$No8b=@IIY@<4jsA7#~zW>8Kt+Pf0x5s+;UEWvAvLeKA%dGnZuk`lJaP z^Z&f_zjHrxjV=8t9z(r{)pxTSw%(>ca+^8aOwm*Qc6+(q;&J?>E7kB-*Z^LEs=RzO zx0`sw_o&)7p6b8TG!^5=Nrillf6c z;f-aeePDJWNzM6xMwJO7L2@4$jH+OvRJ+FEyA!-?BQM;%zmL}m2lwYs|LM)hq`q-m z={|ePBUT?JqdWJ)i67e2DQ#DV%b#U>5r&f#X6Z7FEk7NKRF0U`57Au+U>^TM(Eay$ zVUaHQt#!a%y9M~q;mD5NEri@4LW;|CgGB#DQTEV%#@B~gmS{_BtZKEwtHC9-8l-Q_ z%pd0HA0x$FZV%9p^%`u4#3>eJWVu#&G3 z4EG+r5d&DNV-p>rWV4rs%w@9CfP76wDEH5e;cU3}I$c>0hnTLr+0kA*evCI{N)XH^ z#-1DmmsAbL)(SN2(}3a8X^n;F!z|306U_NR)0D?` zOA~ucbvh1nZL~z3unO@N&~`kAR?d0Tm4Bn5zx&nHMCwTbkS=3$Oi+@4|Flo$aoGY) zB6*VNZ3fDj^W-fy2Z}dgC(+1Qyd)e==ikG4xCx5~ zl+EHEh6?6I1$;1Xa7kLXZZ2CCld zZ`nu^7Q54^)E{&wD9Q0@lnsO*<2hFUjU<{#6NM9#20N*lHc;7FvQgj7OJ72{f9@vK z=DU1CU69{ILxS;1Z?b*FId7S*j4}BliIw)$VHm81X8; zh7ED0ZIh@VM091bfGGGoryH)mr6>ZvmY7FVGDL+bkPkwjVgOqn-i}Y5oZqsyKQwn&DtWU|qF>j~Jv+oWOWm48jxdC4gP?(W65!NWk_gd<5aDPVAdg}llf5=CkM z9g79+D~F)UzM+#dPCEbkUcUuzJ_+o^?h4CTv$ zYK9(0i{^=PP*!dI#$g6;lQsp&KD>-$Y8rIRcML~SSN2zzjzuAeCD{C~bqzt`eZ^sG z$XkKoL7S!lhd18`QUIia%;r?>OZqshtF%0S_G`!ffUkrM*_lB*l|1*^P%16$iUkxj z{7W6)p>OnJX~B+0^s^ov2kU1DdHJBYjXYE+a;Qb0JBQ&%ds>yuVg~w@i zcH!&a{#b^!bG4NNywnY@!>AcndWHi2gnm^TcbXklv71s0Vq8)jGO*YaIh|3YF!2{Q zzKHWB{(k6J^t_zg`Z2$Q{L@Us)~871uGDp6#Rje7fv>6VP79Q{Z`zEh%wX85##@+M z)LS0~{+!tN@wH6O;xHma_sHhZ7gT(0PbnZ?)f;xarekdhl1w(L_E&ZNtu4Ts zCUH(;4&w>a{_3F0>+9gQg8pwz&ZZ%V(?tN;f)fPRTQI_kPlIR>bE6lUC9niiZC4?KC$w@f zAiHc}6<)_p!JTTy*@)k?^sd+pH*|?$nnp+ z^gj>r)93gCPStK@F@_&B20q{zLNz0Z%Aw<}@%}G1fXR2B2xTc66T%Jp)Nz z!O_rDruBn!ZjMer*$suq_^6b)MVNG{`ywh*@}Lg-RbU_86ah>|6f2B5*U0MDb+jPa zvz@PkS3#3)!Xabw7$zZ)>Oris1k~DG_yG;qVGWY|3o52X&SHsRK%3TpK_zF(X}~BY zO8vy6^bhMWv*>Wpn%W-wZ;W;QSX^lVP<9POM95eNgWG;o@CY&EOJtPc?k35T%b@Cv z2Hw-Yl?esQGLfID4x_Q00v}PSVfq9+s*LZeIx2dEt|SI_P&%h{6Ea~b1U#@f8iRSL ztK4de-qbUSuHL9r{Hm9!BL`A*N|8t=VRw5(B}(urv~GzJMRFtjG%=^X0)f^*YJhBF zRD-J(@X5xVbVJg71J}^Og(LV>VS6Y(j+Q+{W;q4GFL9N zun~pMKPbz=OKY{Fb@}Q@D1X^qB6ONM2e1F54xLbSiI-x$?7l@35wan))A1*QK&QqQ z$(4glUV-PJ2QM<-aLs%jSjQbnBF;T#j>y^}pHMsf5{;adPDguMvZL{BT#49!ER}d1 zB5EDrrDq*N9(_O&NmmB(zNA)Stu+5SObasyd{&^_TD561CUkc!jN?LN3<#o({2z~{ z>h%eROa~)t`It?}5S0!=DMxMm_)@tBp_}1DI@CY1H@a3uzyl4=$g+S2LUqwmRQ0k1 z*05wIP)0H-`j3a?MO)(^KT*@t&_77C?jnZR}B-AA869%mw3Iu4_2d$>Qsu0)6d z>i9ZPsjGLBBt51>P(F0dKzii+qjiOM$BvB(RR~Asp6U2IQ3`}T&Z_7W+Uv{|hFR`S z2s9;Hz5O{On6bIDL2_slA;{M-GJwLBL9bvU)6E-0Eh@~wCC(J+#ja^|ZKQG$sDAY- z>CkrS157E0jd0IIcpIg`@o!ZsUjDI9_x`H$pxzRycwMC#Ne%tdguk_sLfs@6r=DtN z%ynUk`&)95hHx+F^?#T%royw>Ba!R)&#StR^pc?dUsWGN{m+gYpxjkAjX-JrfxBeO z-yaMq(vP0j<6-BK7nh$ZRG^1OZw`auXM~`wk36gniF5q>Xo9qD@13GNKDuH){e2X_m1?MuLj2>v#Xnj*WBwGhEFeYE6cniqCse9=#MRQL4z{HMKCUh%U`cODaFV@y+#(A8>{P!gp3I(9 zwT$_fP#z04SAoSs`8k7Qqa-Z5_8?9IzF)$skua2EHPnc!F_8R#>WLo!Ge4|^a?-BM z;v8+}qWe1Ef1w7%vUbfF+G3*e6byEKZ@`sySB*KE2@#WvZ%}1>HH3R$>YL*Mx@=Ip z$~C4IFfJh|nQ%`)XMa>!kq70TPQzR;0!XQ4L%~mcZ-CMiVWyl+sw%-ONYmbtxqsRw*eCwa}aSy`wBzrGcA$v&ETdp{) z68dV|Sq)^@_oLQYi9u3xF$?*HGl^UC<>jkUHxj++7r(5k(Ukd&q4EWiH0)W?m7Gkt zA1vTmiUZTTP121;jZI(NcB-Z>H@N#n1AFC0Dbo{=1T5XBOk_{u+n{|!?J^AvRHOlp zhww^+P{q+UViyhPvEF0-JK$JK$|l4|T8$9mJTnGUELQqq+geE`x|v-jvRW3VF*}we z=f6g^>K~)aotg`vmv?61-Iv96$f}5Ncu#@RW%9gPCs{AIZy$i|*xJ;&&^2uxvnLIG z5aOgPhg%}lyI*|Q6EfpprMqaIH$AeTnXM^U@IwKJe-3?*=-!|mLZ*N9!p_T34I=Rx zjl`Oy*B?=4b92D|?1c#gA}Z+Cd*GzAar0c_dJ9V!T6JH39HtwlL9imkzlscOL>yHz zhCC|SPK>A_N@^|OZArI8~D5{kM+egxU5Y1V**1DCFw3pqP^$-&tfamL>|z z{Vt)WQC$QK*E-uGSvV*G@k!U<5%9?@=*_rBEM=*l3sk!sfH)G=#j;@0)8YMnCVVow?^4EG@V z50v-x(q?jiJ)r zlnCt-ivHpD0DDsEChFl>5#dE?`l+5&tH_A1J+T;io4`E64vwGr8X%D1f~n-rs%LHr zVYsn84|;XU>*8F~-=#GBEI5d5!IaZ8DO+JdiRzd7Pf5;MOeeu*-P0(26Dm@z{@$x9 zP7!i^>DMHVV&v|BDFpdjG?&1;W8##WqO|RE|6BQZC21TwUub`8)z#OVplnt&d3;qH zLJ+0m;0NU~6@CYPjmJCI;ST2`SbjVn|@Ws;VZ!7xh~ zWw1DINeLV}Y<35Hg6d=bEHanJM12$(M=oBI1(~Vb6MIDU(kRW2LL+NmVs`ur%7`^` z)LJrPE_*^Px^oR5cUND^$qTgPw^Y5*^1hz5NZit~T6)+1s^5K`B^=yOu^IVb|E)Q=UPPCXDKMPyrin+1 zH=ZOBhdq&QH#PC5?59Z z8kppHl6d5+-RpIv*N`QRLedN84glR4g(DJFJI5)f7m}qgh7q6WnVt$AF!NcO!d&hh-VWESMDs3b zFTl(L9dy90kbxPe}*aN%o>bQ=VyiP4z)Qe??SHADfA(+ z`!2UBV*#yMAuE`-fJjJoc#PYJO}~ zk%>oaRHKk%ta7`l$R48XD$5=s#ex`fB*fEo@yk_L+SMzNaUutAjJ?6}jVWL}IUdV` zqAR^{WO|MWX}RJ2wLNXbFj5C{Ab%e;K@3CY$D)kY)?It3>7$R8q^w#hp=wf zLEr4;1(S8T$8Gi?SME${g2)fTlLVH~Q!Jpmv~`=jGX5m-5K*dYd4pni=&ixjb$ilZvAiWPVRnn%9uZ>qIOG5s#2$S;^_(D!kO(#QSoc91D6Qv2 zyyTJqbz&XHv!u-tGlDG0B)OI?@(M5S7DOPPWg3aqEq{!MN%Srups%_Hp;CWcu>4CV z>&JA`Z^`w}J6~2Wg^L3|skvYDJoqz_{QD({fN>5PQ1ItIcF@tK5PxSz2n>|6#m4iA zbAsIOXQEH2y*2!z`)l93=d@6lGrUuqk@KMSFga)ZVj)~2noVJ0ZrwTW)VBo&%%rwZ zMk8V*y&qI>a5mE{QO6PHiO8{T8BotoC+5$5r!>e&QHc7d!;~1sa^nHGc795Prn45| z<0Q|}J`9xw$iUrE_z~X=QE>j29>Cw$Id!aC9=hSuk9FjxeNXI~DI&m1^tlp{UUghx z?eiq;VB|YI?u`}zE)RARpSj2!`|3D`f*2}+EPPzh1N7iU@-yt4H0@=DC|s^-o5!fVL<^uiGw@_fnv?zQRUFhy?NCrhn} znh$jIU-atksHO#qcNJQI$Z%`r&j#uRKN%n1+k&WfNk-*m%*45`nNj9$&e56?G! zZS2`8W}~GSgOb0^lF}|yxU1ynXhHMpYuZ*jVjFE~D%a~{quNY#V8CW}jA$YE1l~82 znFM9A{)A%4J41UoX657OoXsb33NKpGl$>OqXB=}xWkwA$qC+bf95V|*S3X-v$KQ*) zDU_7OG8{j*Sd)5*|(e=d(C}DD5EcwC4;UCH5Cm43+HIUF1kuwRf3$%5N!W5En zhJbj|KIxtlvB)=G zE(<7I!8L+>x`VAM0t33Ks7TDw130%D95DIq)ku6#w=VB0Obzbf0d{}T;zrL~E<6@* zZ^zdMRh0sQjx;LOX#n0eh#@+vz(c+PNeQV&0C;Nu5X@P2?Z5b*!<+Pg5fbkMO4|&A zz)E0P1o4mZ{M2xHa)(cR?dAI)8V8vO_ZXApE-4@CbvTM1l7vBD$5&~0t`n_V?idmq zq}N`Xk_vg5>0G`Ff~9~*U|8X@+3QyIeY~S$`_+LuDRa1VKC^#1GZ_dCkOIdC*-C5-s{7L4A>+O*A#~%_T9JaP0 zdXdhAoY2n`up2S*z;~&TMw%&=IBc0r>02CwK)lH)UtCZ7jod@Xs~2(?v>FY6z(6sX zzBp(#Tbcd8g15y7wRkZ?Xa!VQcCS(Djt4PAW~h_wXc6f`+!K1H8h@0^X@9sHoNKv3 z8WLt;N_dZpiijA6DKo3J!KlH4ul;lc-=NV8qG>MuEDrjr_BifDn7v0LZh4>vD4pz_ zkHf!ipNv!2yp247(AGU|L$>gJv;%7bddtWjX#tmsNJOX{F~YsC7|;z2lOjURW{ro> z`re;3%2fQ}f)$DWv@-BJead@Uo{tdnB``#v$nce}vh<}7|+gIN( zf;k!*FWe{hFy*E7ld%!9yc`ZL@*xz3bu;^gw|dlbV;FGWiQ8g!Z&eP4n1_Y;_J#xh zQto`8JAYGw@rZW(nr7Se-0$Mhjae*(Wh;JUvN8DxtAN54yj4J&(^e;GR<)rS++FsH znd5#D!`*X{-13-OndRpnP&IrCeR#$jJ{vhI;O6sEL`zG{wCG6ta~5>H0fwaQRK_dWYPs;6hbYoO}GT05-0O zS642OzbIJRTV-G5>$F39^ROdt_V1_dz=flsydzLFaTIW4Xk)?wg49j3Jo6XWx z3SO8p8+bBT#mMq8rCyD;L?;xq@PWS4CMT;y{Ui7p6$L;} zZ*KzjY*PdNhfaM2Y1EdDj(;X$(-aY?xNfn`9%TF7@#6K8+lBWG2UR_SGW)$Y@*YXU|P8Wg2<6T{E z5_1^JpAD}u%6r7C*E*)(*nf-{YiaaLZ^q0wbEGiZ0`};$grhXmmq?GDWjNAQK#VfH zRSQ?HG8G0Lr#E_Q+aUzDQv3g3nwJW1MmhQBeLg8?o{ zS`1P;otIXR&7S290;MR&;??7UYm+y@c7Knuc)Wd1AHZvoBmESrNK_miJ}b}d1|o`F z9pby!v!%Q6!|QzZL^9sx+6KfRVA6#$wWO3Ye5JfQpaW`8FTjpAdWB*-T-BAxX#gJV zbbdG>3Ea#f?3HG1uU>r_axq(%6Qr~^E^ql>srDksr^ws`&y!QPNVT{)wRo;9mT?ysh&G7MCsic_ zm4jLog4M)qt9izUz2SWNXnW#pQ#p?_YvYBWVNDinFYS6#{(uRBn;Pw*)JZY{?{xyIZgm<+H^+emLB6 zF$Lj-utxl_J2lyeLvx^|^zk`HcTRyS=Z9lSoAMjjmfTI!-Yt=FB#d5hM3*}D^jshy z1?(16jOlK1M1oo(tT8>JM%MxBdrR9S){6oVo?%yj>>kCyMN85}z|Qu-Db(F*A<)(I z+ZM#U&6{92TE`F0#1wC>n)iJJ9AghAV7?!e;rKG{5o9qR1}kVqg#gbjo3Y13mlNR;?{URT)&Y|9ySU}IULSE;F9o)2|0L*OrQQ`($)u9N?Q68XEu^w46p1Y zWtl^X5w?|G%>gF%MENFLTjKCua>20C<<4d@EZ)YPh!3q0iSE9wL?0hTtLZ(8D$P@% z20KSn9uC)M*c%a{%6!!JW$$p-3g#5)IN*d4}f$c*WzQYLrFx zeMPGH-onB~|HVeg=$ys&wDPWh_Vt*k_`)wOsegSXwl*fAJ#}LAM!W9;e_OMg`^>Rp zcr)QpnyU=2=6(%>J4!Cahr87fM9o-Ak@Whdi6P8 zK?#pkE-n;9pcPm{F+XDZ(aQfT>2@s zLt-7dh^&!h+#&fPuLJ)R9(|3Xy?+S*V>7kA%J~985SgtgK+c-%qK$&M3y{p1VYo7# zjwrawFQyc!e*(>*yIGqW%^DzHnVZC@b;bk!Q5Ing_wsM-ndXbn{5wvO$Ch zrq&6ZuNs<&JSsX~w`CM9z<0+F6bAkBKT~ zwy;O(SIR(M`jAiwp5tS*PgAWnppf>JypQC@Lllympd9 z6jWMnf&ZCP7pAN`roe0Ex6tV0nIQO)GE@mDr5DFRAIV*XE7!5|QZ5ps(eny4q+_}P zGMPTkasnskD)|{uwXz{YhB3zP@@f zYGo*h$y>FlD1hhbnRbY83gHSKihmNqG3SUAg>dn>0$AR;5R0{*m2z#MvkB4`~0-x)uT+Pv3=lLQwE_CdfTi}-mvC}NL zokxQ57Wn3^3bYpZckjzA=?|kgQo5`sHDs)0!xS27H&d76?4X3 zUV^+NR!UHwX8uaDXRH7RgrXC>!;&_SeL9N>zp0luaFvN-7#xGcsjutZJi<}N1($ib zu>^$o{luQxHGe9*(&8)0%knLpxP}^ojI7}}Dm*vz3h#Z6Kk4@U6kp4!|2ClC4bat` zwt4!jBKca|t^9a{ARA9&*A??qUp-kJps;Uqi*(J=i(q^?KXEy50u|9>}#l__F;pzA^0malMBamgMtt!vKY&rMqD zh6!X~sCSIAy!$MY-Dw=J6Uk4qN)9;H2_IQO?6^YC01Z-px+no@5lE<4Zp> zam7zb5Nm=otv;`6i3NHCv;F>~t1yvqwu#}vN!FGsTJq|M=SV)u zg`GMDRCWC)P<&jUfrOiq2@xoLwmrTJ%Jf6ZYgnMB@U@U)qFUp-G`w`8>PFu-i7o{o zv}bc_0fzg53D5wNuq12SSIbBjsP&fG)-%+(3qKKI zRs%S{zuWRPlXN8j9;PRHz5%j)6dLMl@FUmMY-j@CQbnsHT9@{dZz&qj%4bfHXS&V> z^24RDIIEevC#>|Mz~V|ubZwFCF(e$S08rrmNluM6QR@ZHr z*)Nw1LSU{~)&0ugq^addbPkz?D#_k_q9fKUS&S}!eHnw?_3aSO!Dk=G80IIn#qd%q%cu#rjraX_y<=^PxCLh zvdE=Eyh2=F$$Qo*7(JUeA)_UID^>KCYi`5^R;B06z?%wrBsQ@tGs=E#3?ie_WKO8RB{Siq!<~S$ zrq??N8WlT0uAGWmSQcc^6dS3Z$S#KR_tBHQ=iYq6jq&^f+5(lQQF2_%= zGAf#DA7esaR%9j$mh78v`{J0i$;2)BZIZwR!7*%^3_q#b%*CsGot?bk&vKDhj{V{I z@@9Sz@~d&n*G|NhpqyxR;wxD8?Lt7pFo|m@xex;o{I(#XwRVII>f{NS)A%VspFTZD zM8#aSAkZ(GUuQ`9?Qc zu7z?9Y_9)RkGz9nn%Id0c-iMW88c(mvz-!RyE#V(FsO>XaDWkMSOcvd8LmdYOR1kb zmh)HJ^S#XTLkL}oRDNYpiFbdy=MAWL#i&r!Xy_0-?m(RufwhJ-Lv-AjYb^mMN=iy7zM z4uPHR`(tdc7zq19wukj)a_7nd%(TP8>^lexUvM?x@4@{tf$*l z`$dS#a=J|qdAsR$4qU70)KdfwNEZ+B2(TXnCXESIy>jq zD1>Ib(O9!(W!c=TYfk}2Rzn&-z=Z?n0PeJp3tz%y12Qh9+Ay~bc&fo_zuX!jR0ueE z5MMyiG;3Fy8+H2WHW5hDOt<;gN-5nYhAtQe@mdQtEX=OKTNOA2yp8IGkXysp{kUCe zj}$jE!RV6d0`FBRL~c_L z!ryHb#r*Eg1J-vh8z0}jr)CVfRRIO|Ztn#kPbpa4VSNt%DvFZdwB54j=_~8wuH@Lc7*VRTLp)igLS= zn74Ai}6mGMaXu@qyO9{7E?1^yen;9Qa>*aO_ zNXtcqC9~VR%mUaz;ocBSHMrbvA6Q`RUSSy2?sa4Z?cNzl&Ti$4Q+69l7!bFDi2HDF z8y64v3I>whE^<0R)k}h#dzA%50Csz>Q-ojl`qf47b#E*t5qjO)3mJjet!?xo?7Dr6 zE`qLG?Iehh>o%bQ5pdl)BSC~)w-WOd!OQIgl0@ioFDDEUxZJy8O9Mx@|A1)F=r-w8 zHDGivE+P#Q-8(}~14OrCLThm7R$;>$7`h#(N`peTi8(m{>sA_T1axcKB;%iZVMsFi zxmCuGVw~Fs&=uR{>Ab_d99s^R&#HyDy0w~K>#+)83_$E^|e=(vr6Q;vIm z48w7+t57%Y#TCj1khxm{Tgcs(aUt$D$@VPJ#Y#vmHFSG{=)qGchlErHR4o$b;BME4 zTSerF0H+6buOAh!bZ?0f4|KbY0snK`*R2O|Zz~|zbFUC$k8l9)Ewt!zZnuT`o7;ip zoXxFSd_B#rW_CAo+eoN`xy=r2uH{}jn2NUUg@xgc2^j}POSh5iIh0fcbSsxu#XtAP zdQhZuFE2esHn*8TP4UdFlynuz+`FoP;+I>~@F;4z%?x{rRqkaW!6bIu5e5_3t+bq& zyl!4rZWR1Lc{0?p=z=^uq)f^USRY49zdM9mO%T+^dR10&3RR?OQ{T zPyu8H!WY!7N+c{;WM^t6-KIt16q!YID;6Pqd<~@Z5w>gY1)^uw+%A>ohL1IKFDVZk z;H-J8GwjGU8S=3!jnk4974#fRQ3^D6#U%$00s?4J9+d@qGCgn;qL}Fc%`W)!q)tzV z=0M{WYz=1Y36ungSA@dKfg&1xZMTS7u(hO(X@g%DqJbv5*L0ZHL`%;Aw;18pQ{YM}pXP*S$<) zEw0NckqZT0aTPnA?z;78T!vVG#f;pmWqA^N{R=Pci zjiVn{6iJ2Qfph`z_VW6pz~`b3j7u4^w{;=d0Pa;vpd^46o709D#DOM3JtDXmBCxu`V9`=wq^QumlOsgT!nysY z)xNp4N1}Ce@13Rl1iH=?WrSz&%BHz>#o?m4)eg*_xm7}mHFN9Xhb?ojghGX~z&crA zDT&$uXqkGtgMQ@vT2m6Ng-rLa_#Fr{dY388|aY^XB=tJV&|fkXAr zy@-3a*zATsOAyVqI%ki}y|W~Y{>>Q>wq_c9Mb4Q^BQmUp27y-eQJ+UG@X$7k#EB)! z=2lWmdMHsf6o{+|q-So`#mfam(FBsX$5{~(Gu&vHTMLyeo*L?FL6r$x2ZsTUjzk8C zlc&b^fwGVu4PZh&%&??Lu;ezN4TbmGKofZH022J}ZE|DCy+45L-K%3w$gLI>&aqEOQ9JkuV-MAHyY2#iNc52)<5{Jg^KsPh)H6nnF z+dn?VxP6NZ#_bNFUfedIkc(SW?P+mqT@Dtv?I5kVy#hmuTZ<|6ZtXB3@76+eC~lon z=ESX(GMTvTC`pN1FFmrm{bLv1y?yx1-Hw8kyUh`^&Jee{zy)`&pfGQ@86^&orJ}CTg(C*Zi_mg;WkG+!@Yt?Fx>7#b+)Ek~E)PS%1yTMubZZji$Dx}F8#n%vOq5IIV2dO2iCdF1?Z@GA z5RuSt@9wPa%&ac%3~uo4FL{XQhP*pFGg~GPk0g({og~h*w6wIeKDZDk-nl#^^fUAu zRGx`~eSAqj<&to)Y^gNjM<~drQVaUnv$!Mz=kO^{FO29xKZ@;sA)OEMZ z;NF}#IRTYR!oky!P*4zY@FZNGf68SN?+oIG6cj`pJR2e#yeaLFJh=Bm_+MohMq`@x(g~|5zN4$UDP7 z7J!I-Xy@^WJWESco+D1>dFOWGol3LQkPvVUP%sHdcn%UO2bE_saC!Fe84@{{h<&Hp zA>X;=JC{i!N>0MVQ|-`*AEDrR{s|HW{+-Dk=P?<$JO`EM;h$hq33x6C`I00BlZHw7 z4o|g1K|#JVkO(57Vc_x{gM2Z$Y>!K7b5vbNOr2ht(|ImEoQ{wkkR5i&m&TejO` zU-A&~T$T;;`JG86YbP5U+maCQOcFT>69tn?zCpyLAtB%(;+foWviU7Zo~;cM4w@PA z6&@7$0M@I5qV~I z0xFM4mCPPDe;-IAprAli8zk;mf^d53l$kxh`uBZx~rJtXq(?C#9$ zO@|@jlFv{1*VfjS{39GZPqY)MwssyNGz45OK|7DgyW^pt;F2#{n1t{=G)xwrhJ=8~ zJHMoj^9l0qcswH8(9R>0jn5EpPcK*BS5h{)%c^cy5R2bG0qqTnFmDJZB^ zat8YGCC|S>zVk3~knhMzNVq%``-luGlYK`B4FUO*Bn=DC8+tNQ@Hi3UJmfpol0*#o z1kXgl{5#Jh+KKdILpzaZCvq(Wof}-7TO8aRTV4woD00v^ zg8}2jiDWB0372QG?<8EF$-`ve@>~w`DLI#AgM@nL+S=i%Vg`;BH_jW#v%|X-Do)%u z&%6^^R(7hDrHFAt1`8Y~Xq25dP9V?fP9jP8_67%)g@bBw%R|JyLmW9~oacEW%@Pgy zjvPTejEf5a7a|TW#GITQ9o^epT!_1~xaFYoJdemzO^}eV?>tnVYIVy&LMqI6AsFwzsu4H@3I6H75o8_>zXmGDGEGpHk36*@-8J z_(ymIiFYmq5%~aya**)I z9+ARx29ER0>@Z1$uW;{_G2={uBS&5e9w|;Fa;Zc_0_0>%3Mv85LuKId{Np1OJPDU4 zSy>3Sx3Y5vj`KXp&g5Z^lQ7UP2?TlCNPz>!iIY#6z$KsIsV0czu<#sI zo`2_|@;m{R=b(~LaBU5VY(qQyl!j<+A>QEV-sIxq=G@%e+ys#vvV~xKTU&!;ds|zh zdz*s`AqO`{C+7wi!mTZ7$QO9%B23B}sHvow1dxvwYsDJfAqikTL*Z&|nA~1`TM8VZcD$&J)W823?9M zrWB29tWb#wVzvXfXc5J1|Jbq{6uyVd^Tk-v4HquLV#RvdK2h{w$r78#Y{2$~Bf#6co#$tB@npyBe6&3$~!BZ-G>wj3Lr598$6;7s@pA_6)u29CWw+t6ddBE_Bw z5C8V|>^;A+!LbPz9#QV??K$zt5yZc}u(iAj|gi<1aqPkVgn2Nep)QMFqfJ>BH7l`4l z1R(O$cdMgVF>Y@#An#tfMR6~KpSTqONG9$b)K%g(6d)w-l~TmJ-9^@jd!=BwyH}Y* z#H~8sz}@ZxWL6@nC@y)7@KmLGJd2 zBN2*zw>lcacbf`+?cJ+|i;!Ct2%6yIURIBG+=~H_j@uCs-ADu!;^3d10&$xbaD2E8ii(HZ zsw{N4mCylXx8cfgxV6`#^IkD$8*Y8uqv2k)GZ}6RtAgQP4@EBA>fnln+b0NBxIGI2 zg?ll<2+xHS|BRqMCqlErU|yUj*p!KN2{j%u5l zm~5u*I7GRZfgqIIeDm_8%mUNOZHDM#R_S!!+vbCY+g+F>YIEKSX~71gJ!SyIt%>kl zFcL8Ac<%uu#(NtzN@eDGZndDSJ{^Ett=xUit$+nQ&TS@mL zP2J1HN0HRMZiEyMxOWh*qNrPU%_#;jRq$m#Y?wo ziYivR^^lO_qCo0$@Q{Omwe;PQ^p_o@d2Ew_%Ve4!V^U zUNO+EUJ5Awxs?W}E3I10v2-lrxz`YbqMh5sE9)f)(Mn+Pk+7nf+X(&?$J|?H)ty^K zFSmZ$;+9)&(J5xR%`~>+m3x!a^4n3Aa&H+fn&z0oZu3RKaK-d>s|h0}uX|H0GIia$ zfYG#dZx=h0)@`H>Q`T)DPE1($LI`ChaC`4GTiu&j%v^P^0~KbfTRYG&Pu&^^fLZF^ z3VkL5_b$<5j=J@P5i``SfE7$ox2GIn5^$@BVrHj%)A%ws-KK*%Gt=!ec+5+;d8dk% z#u{cqx)-sGIlyf1mNCJu*^C4CNSv+%{RCD88X}4rURq7=H5Yf%ry5d zx@4ZYO$U^OPv)3g2~jb_+&iei{Bo-r3}% z+$IdM!tR}-w!rR1qzeFv?cDI{pd)Q>-CopV#j2fkYnfiWAndDK$1D&4sa6N>Rv}2FnQ^+h>c323KKf5r(408EvI`2$*iVZ*=Q}MTOpi^D|SWC0J+R&MrACc z+q^SG;=%%7MiMQddvWRcptl0tuGME4Bp`n_4t-fDCFqG4fO~ZuuAJLe0)gtXZf*x) zXV=`Spv67oI2rx0^RmKrU|o`IUFuj`758K?an#x0nI%R$OoHZnZ_p z?p|dsx_cG$k-Hs(M2OpDLFw&wouRhdJ`&e%pXp1xO%{A-_hLDg-J3?{!);UvI^3#5 z0f*a$MK;_{&?&>MN|IctP|U-JwRG!^AX{nOY@21XYg82#cQQiiiV_e=i1PLuI)S%e z^a8xsEK2hB@k4HlFtK-Q7YunMZWr(i@76CtMBIKw!|qmKE$WDJ@X|qkC@CrdEN0F1897;=#rpRY(#T(5Ls{P1%|*uZnhceAwm4 zdvbfM7n6GvX(is~Q(JNy7b3*lvTPaN3Ia*ttvL)Byf@6KBllhyROH?xheU2IrssF7 ztaHe%LAn6B_YNM9TW4j;aqH2Rja%V1&A6S60OQs`qe#$Vahqy2kf6BN%1hi@%gKm) z&xo#l?B|6;u!Nhqx2qz2owlbS=+ejBn zOAc=rDk{8n3nRgM1+yq`-6AXSRsc*9xfOs}f42fz;^Ve?Nylv>P-5dY+YDvgN(6*4x1F?Rtaq{?iJ;##k~j+T5<3BE5&WHAd`2S znBR$8VJu7BW(706+XmdZyH&^lcDLP7p}V(%pWLl~2GH(3M91vjMt?HgDr|rU2h2Eh z8SYgi2*d3&TrS*N!a?Di8n@XYj>fGBOw73TgWHQ+l}TLOt8XoCB~(Pky}_*H-8JbW(Ct83nx{l25ckSzL3Xc<3+KJS zCTO^q6`l-%n+T11rI_wLjP89jc4&;Mp?mkFfwbxpry;logH+1F z1A~?+Yp9MWaMgm8NXOhRu?kY&-wnlbdwHCrCdIGf6e#yDsO9ALUm7O2*VrB2J7yS? z+jL_iyjP8oBlo_Hk=q1;M0xL!2SjeqxP!;<)-9m;Zk3Y7$Gt`_@wnY2gpS*V@Zh-B zifQB4E^##O4TXS=+k6lR<6d41xVTrB-icdR1T%4KCKDw{+^$jz?>67qh}%Q%+})G=kBEd|pE)&rTPns+UXbo?5ggoeJbVO`3 z^XEz;l8jcl-3D#}`B(&IpY(9J)eIpGw-((l+-3?26mIqOB*N`W3+N$$y!C^tF}p+( zw`Y~Y`<)28d(S$|-R^Y(?lvc+z1?0B$L)3`OKrDFr-$t}^Lnk_8$b)S+gHrAd#!Ln zyG_M(b}JBHX18f#LD_A#p#gDg0f0W-s_KyJ)(Jj1+-e3%!)+LgGTgr5z<@`0CvKx? zW#Tr~4DVhs5Jubv)Nyw^mjw~Gy|l8s)z44e?X4hpn+UMD+r$n-+`8#-w=vPZ-8%&7 zcJGrx9PaHyNyBYad@|fCrVqodX57pfwT$j!u2x0x8y zQeA`<;oSPbNRiF02skL7xy=rtiec`>BFF@GTNIhHx=jgYc()Q7N8FldANpos=D<%nDiTz`>zdLVlRWo-#R|%M6iJ13c)vH_dtX zUJAo@uPRDvw=vA2-5SJCnzF!KSs<_gMuXcah_ID#!0FaT2Ly`IsDvnot_Wf{48v0l zg8&cyP78w+Krk>I77B!eqVY0ik{%R*9>PpsVv`0*41+O9A%mDh1_+3V00<0# zL4Y*?<*J%CzRk_$S?u?OCIWev8yq5l;Ya8X5VjB+2LLE#5LClI&`u0Uxbn6Tewx@? z%UFLF4_p+Lj=!h@!Lj)Empf-TqW#>AR_HHjO%j72t@CxtoGJ>@jO}JtXL+lT3UP_E zTNNZA>(w1ZjK%LwObir0#@~t6Qs1rrd!|dc2n7Gg4Us7WUUhw>A5(4oYwkZF_xXow zy%C03-P@eat-grcpjnIz>OR7af@{pH{ravc$s(RQt9c2?2UMJBClE=&;Lu({=ur3u z>1wKbkZL0)o@r+7Pr-9CEj2HAZm*;CX4vtoLL-vItGub@B>=yN+2X)U2O|4y9jDX> z>&IMRm~{#YlMbsnw3V79KK;tMJB?IHr3dr1`7cQ}v+=p5S!9T>w5!q;+LLD5{y5j_9mz^`!_}>C-bIg*BgQxOcbE<+VJ6Ra)yaK^5 zCu;9M>QOsMVW(;r!Wp7^fn)7Z#8$+W@TY+X1p=RkqFt;vCt$ z7=p2{nU63f{ZVZbzrlF4_~jwZE7MvgR+-4o1?UA>nGT!-8FNj4^kcrVW85{u271;J zz)42Rf)3GTim1yVxK;dZwlg$}YSEDK$-WbVpfTr?W| zrBSlWVvi$7O{)NBE_?nT7m-+qJc4`_+#jPq!5p+5ZAQ@4*&#lwzLP1Xn!o0?Y}iS$ z77g0-;UU!F6*yIO+@mlL$^~@aHtw_299kT@bAdb|FlDP}zLT2V^#c$%EooUr>7-DJ zUj2)vnPYpPTLnc?kZ^wx)Db!kuVlM0RZlDNrS}wP*O`+>yf6`pjm)WT=R4^i4SUU} z!0~^uvbh4{giYb=eH=KwGn|8TgJW(9Fz}u-f%3a7G@Ue5}x$c|PfMcU7BC0nLKG6!Iwq@lpmw zt}k{@nlPF>ylme+f{%%ER1rYc;L3!6;dyrO&JjA%gDL-1h!a+tir7_T4AD!2u!pFt z>=?Rzw_h|aj}~LUKwbu#x17VehVs~yG5mmI1bc4-9DccI7YAW5?XBgcrqo!dBo4aG zaqmd0dJw|~A%sumMN9UhK8rZU3_4nJvXHn5Hdp9s9y7zNPtq`C+ zRBk!qS=DpS^O~lp_~P|&Q^(OeXkA@0R#~0Tf~^_mPx1%*LPrH@cW09jm0zXOM{TyX z!d`>|t{kW>W38!PUs)M}icQ-xb*v8kPHk+ryX>9NToHwah+pb}IBP(EgoI6g`y}Cx zVGWW4PU4~>Vmx)fLLueH9?2~2+mO8Q9vw)@8ey=536zogdp0TVU@)^v6+eY1)jj|d zZ($RFu3W{CY>RXP@Hg!H%1djNa6BU-Ff0y7Z^O?T+G?hg;I;g!(*V^NMvJ9B?uwoZjnn|R0E8Cf(7;wpwaB;kNRuyyxb>Dh85!t9fqVz3qGG1 z-=b4+NDKJUl)hUtHHuUgnAsL5^<8;R*8Qfnc(XV+M~()PB^*>uL-$UGfaGjr!Js0g zWk5ZjQA*eaMbH<5CxN@d|LvL=))gX^hoQ$1JYG2L1~x3w8QuY@9mm(&Tb)9rqhSri z!w{?f{oVTNc}twcfn>>g3j2fjGX#-n-@3?xQUe_Y@gpv+>@r1a#Do{}%Jyv8~5ZI>TX3(|&PVv!Trp+An9d-1I%THG&g2S0Kam!VMLRNTTW`m*N~I zUdpn*apVP2Q2p1{*t{2aL9;AKsDL`LKJo-2E5Ci8E?1dQM|z2 zox)f^25Lv!3)8Kq#uV6)sF}oU^QF--6&0LSJ!oc9Q%ba{jPZJsevmj}j95v9k4PAf ze^50?(heU)x=Q@a_Ab3Ay%2ymWpN>>L~sP|kOB2xr7epTwWFqxX3q!nUf|1=BGQ+g zd(Acovr@lC``yt^PJ*L{%z(u-ekI$*H9nQaJV5%Z3NfcWbK0nTnR`x9-hFPhKTY>a zx1KUXnY&YyWw0EQU=}&_Z&nyUof^dn^PtKbd9h6n8hw!BVDaQ~6$Z~NRE~xlAnv|t zyez!rW{#-x|BVS*5KyxM%MvAUlq5pyPpe`|f7TDBbpxM}FXcqMS*=m!w3nMS$bbku zN}~v@fI?FqHKCOxsx|{)A!2oOKhQYgse(I#3SXX|S9u3qHoW%aZ(by9pj&kFGIs_v zc7l)%82KvJI975P+$7;iL3BwS?U94Krsi z&RQw+6W}QpOt{E)=YG5+p}~e+E){*so$$!Pj{1FtJwdUg3RKu?x8Xqv&$K(xJdO4Nv%^7oZIf-|=r5RAd&MQN)~ zSg{=e!vh3*v1CZWsHEI%53u_Xt2+4h?;Z))yG=QUj_9Y!Rp_`*o4PtGNXNd%{_JEG z)butgs}Q=)FvDnoif%+_PZ)7{!WJO&h{8hw#%%{EVgMw)<2NV=F!mXM?62@3N?0&Pv;jseHY(%+N7^b+J(Rw)IGD^`&zuY> z`I831M#oz9?ikSbQA`Rkv8mRquA&I1t)|U$h{R@GKP5||{?@~h=x(UDh zzhN%oYRAc~jc3(CWvf4Mc;c^i>ftGB5(y~Ub9|&(5%N|R6Qh+CY`H%ahRvSO?C|$W* zS`Ril8SclG}x=ck8 zHA7~9hCdjTFgTqO48Vq_3M>p%G$-E`7g2D?a`m8nf#v{LyETEsh~;-4*{A?N-r>v6 zVz{>gjF^xw#iu8GoWeosrxm*KL^L0U*PVcrs)L}mt{Y#Hk zKd)++58+mvf2B6(#CDR2KXcrW&g`{fdVf~kwXg-l$D!?{3p19_mS$??Xm~iTZM|*- z`l)i`swYQ8W5Pj|94JlH=$$pwB0~r0;mqG_-BV?go_*gKG#dq*u%*F(-=WVybk_dD zC}+^yt%5F$q(pgt;GA??bs5|8P` z>7?Ocr1Y=`>PF)5O)>!c`2pnz!r=J$Tu*57xJgiB3DvzX1vOcf6QFUB~byVm50{35C2xDaud znzY4G5$%z78+irqlWSO0zr(^v{2}WJYX`OdSK4=_DR0S8H1W6$c>i1z=ci~KhsKyj8-{F6o@FwOgr&$ZgF$?P!tqB1<(eg7S?i^ ziX;tL%76J=Tyr-nJ{VHc_3N>zI3=>m^wumR_U$BLk$0tHWyFaiiX`jb@;15vPmZ&> zyt;i1ZSnkM7&`lHe|z60upMD}VGqlN)XE!7Vm*@-rvOJlzwh!$R2tzZwGn5u@BR|? zGHQV=0r1fT;bF&-J)85|Z}G;x)m~JEtSGEQk~V9!y6u`~?}qAI4XG!pt)P1HGM6(n zLWq85-CTcXV54At;nk|MdYMWLKJ&%nqI=oigBb0#YgsPfA`}OsWwi}aqkqQV(-Gjf zd*^05`8zuxlZM(BL6lZWc1fyBD6_9B#RLj#o=F+#7t_$0XX1phg3Lrk1w5#~!&emi zheQGPRu9vPr+TPBn}8qN>whbslBE;-p~~L6E+2jOfi7yA?Q?kr_~(E!T`}zqO5Fq` z<97xAx^Ghl`+EDoq(0X%ugzyQh@o2_M=T#JOf8p#QL)$TaUp5cE^GJ$4 zN_nIQDK8nO^=A38?nZ%0RoH`a==!}n(IGX|p+O?S4A+vEuP)*W!@Ctn&{O|t(2a`( z>%Q#qM}3{xL6rTsk{kd0=SH)k`)xUc3KGD)9K-C1ODi@VIvSg30(XB^y^Ag;EJ6TH z&Ek>>Vf}n}1LF2k*TB6q-2GqEgvCA%J(>3u=gHrnXc3{5?&W0)VZdB3xlop@b!hsQ5m7d z6hvuW?^+@Y=Am8pn(DmFNEi^EWjU3-D?{IjdEZ z*l9rFzs!G4V)$WSbf7TsiGQj?vl_>4lqr=hN)h{Qu=&e@$tKG?Va|qXJQ^+$~wq^C(|Y^Za$m^Mxy=<@4mhxSp#tQv1BH9>^ifxm?VICA(B+IAJhW zxfuVw_S)WI0oRZPN)}cZ?kefH*I?jrK?4d5)ox-D(>>4IE&8FL{sSck8(Qv1O^~kG_AM|sW z^ezUsl}*anS34Fm{MS(lQ*fZ*w=DbQ&hP+Fl&=`H4rk&$?RggShF;xSMsl{=KG+py z9@PT(Hh}u1e>zO)Oc*iQf~__42C=Ba`OJip@T?G7Q4W1oG29c*vtdl6q?LrwFwtL2 z;UQAIc^Uh-z)P|Py;?iPZ_OebQmdc1F*5NWJygp#A>1dxh4cPV;)vvROQEp@_2B#o ziw@i7@EQ`5`=A#LD9zc)VvmZ;rGBj?zJtmE;dw0d5BzKd)G*T5fX30GT58R?1BVb0 z%K`q(0ea<>2jl{L19W5Io7*^40HrtfL?Ku+bkU5^&KMs_3Js9gI8ZhQC7)BxoJ3nN zJknFlAaR8}fvh8;tg%+BnwuS8Er;y)K!Y9m(8jQ+(y76P<@1(eCK~59w35;Vnadaw zB7cQ9$wkLPfWqs^jAs^g9c`zjvnGtMSo^hu!Igi7T~rfzG#csC`hOlO-sfE4oYC+R z?#SR+KFXZ{og-|W4Mug7-R6MI9^>!bKmSRtZkb!qF*-&~EEaA!1=9|R5C*Uwu^S_= zsTaa}A66#_JWxJ9zma;R+IWp|jyy`U-#r;bHigCVn3Z zb_0Q3v3N5B6U+SBYoudgnvAyLzCYZtN|im9e4?GFu4zh=hlo^RLgLugFEOWwqAfPb zUy6mJD;Q$?J`Yd|DI3FGnlN2F-3iU+5nTaODD3qAf6Y9^j-t#D@Q!k3B?GH5V~L+q zu-7X0#kg3oqgUGzQ^Pmn4{X5NyQ)UuJsqyetnOn%Rh^2C%xunRgu$;E!_nZf8A4%J zdNfhw=5>GpT<}#Tc%F65b+)j6yludTzn^IjT@!ljsUtr`A0S6ckA1717=G}}AJIi0 zXAe!DA>w7~n231LqujL6ubn}1Xb}#TZYWNGldRaLi^st{e7_N+yAL%?>`9pWCQZyd zaAJ*%!oR(+Xgd#nFUNJg0G{}f)PRgafTM`ethLtV$$0OP(=Qqc8zd1$(|9N+!oi*P zKp*7$oLVfq3P5XWa0&<`+3ItScphE(#bp%TLsK4@Igri(uFfBtF_X$6y>ko(e6IOS zJ4I^7kDuBxK7(fTQ8R`h^bJj|Ux$}7<}+0m6JUlev=jaN`AuaWy)0*NeLbiYD?%mC zJ*HM1S@HRB`U4mN6)#*Qx<+GxrmpTK%t#+m7uHOxW|do`1h z;@HaL8(#(ePk7`&{n^0b0q8D1EzoQGOmJs&FfvW(O_J*M^I29-_p(gdUm&b04E=-@ z6NItoPDIz1h2X|SgSja2KV~C7^%((vruf%5Jt|R{kaLsIM6`iJd^%wgUaU<*d8BSd zDpj39aJrtI0KrFnY3lGhEE}Sm8BmQKdMTxLb#smdc9Y@1H;d$XYag(9Xd5z6G4T|> z=1+H0|D?aV=i5%n6Y!y99y212rNgOw3=ZQWG7E3#BHV$e*T1>KcrDq9b&f__Hn#H*KAVP|5xsaGEhNS=twaf zt@4YYS9%u$nlU2a&rTe21|EbS3}Sc=AOVHRdXhOU3uK#xsOHb1&tbG7LvZn|;qu@? zP3iAYiXw%oFrRn*q8aZ(Dt;lpo7+fkuZ-Ch6d9(><~Yz$K;u?}d1DFfTRTZDYoI=0L9_q)AWkR)F0YbW@IijT?Qcx?%s>SvXj}#ZgWlXp&X~LBN8D1Q*dnNCigBUF$@v9=2 zxVKAuXviWzfVrU>6MD$Ff#$_3I(VSASGrNON}J=?n?GP%2_m9g%qlsS2Vr2QM!mpAg}cewr_V zXiF*(_(^}nIJKrS5FIeC0oLkQE5%a4wOudK0juZ0pG%S54)R3`0iEHR5Gq zeegPJ$dtF-5wk?E`6svzNFN(Cqjs2@83O4(*923-{10M-XwfFB+~qbHNig-ILVX<- ziRW9DSz2zjz=$hErW@o-E0Jg6v9Td|3}wzjwJ48%_JeA%##{J=tGo~@PLP|qS)Z~( zAm1AYoymR&)e#0=bmngGRLPs*`dYQ79&H`THZV%H+^ z%u`9AS(HMv0YKmcwEVZs|8{24T;*BN83O*;R?1~|A(}-=wA3p1#!%ZdG55@n92sxy zHFZ6GfBbdI{d?Z>=V-ae*w&PLoJ6WcAxvgvrh4a@12MO60OtrdB!H5Pl$*S3%mI)4 zp)*7Spt%-EuA&J(gBpYNz#Ygv1&=>TTLD1)leK>1JT9|+heanT(+vih)a)X{_yM^e(BvL* zveam1DxUHL=m!n4@;GQ3kbMyV%^(GEv&SAY8-sCoHub3&A0q|jX&2>CEe;8ppLUdq zX=1O*mdz0w;3?>qK9MIU3RhW6IX;})lxmS1GYrrG$Dp);ahdTu_3?7$_DHuOdhXs` z4non`*5qRrlGvc+aF0yx@@J?lMNRN)+mu&Co{VS{gckS%GN+$@Q5>a5N90M15uMiT z8s5Oujg?L6_9Eq0+kF%mm3fO)`AFt@8j0pE-w37}pHJ^Lzz2mx#R+*qFQE_uQpDwU zF$D#eDzYyO1cwr8GJ-;&5=t7uP~TW_#L@O9jwIr6o8v7mn4;cr6ObGe?otjI7>K}e z2lNi8334D%+r?1g5;0WG9b#=;xeuplV1v*KP&aTxC!5KF5{3c|V!~rB0Yv2WpFx0m zW~_;kabaf{;Et1_Nr-@E1j|xPz(g`MH4kpuz{3a!$O-!32oNGeTS2%aku8cif1<-_ z0{~$IYP!RK;DI)JS-@_z3Y7-HcOaOV0EIucV68s7cH7#O zpo*lva-K^B8G;k@Y5R{qAUj%xoh z;6Oh+sUk=MN|=VQ-L@vt0krk2eWpZMZrkBef<&Q3F`NjqZ59$y6B6625&#z+ayFW@ zgcj-E9{qH?3rr*u6;4i=)xQx>4dR7v0T{Tbk%B~2bU;iqJ`o;Tl473FR`4UiB|;^M zyo3U|xd5ahwEcnttRYlb3jm9Vdtx96>(#0B5t7?)DMXKeXu1L%tNsWQB813xUkn}< zzHLnjQ3)~fF%BUYF_LlssMD{;#|)x^s^Jq|1D5acxd@T>+xX3cHwL)h02qpX9J9D% z<|+^wv;&tGWQNpBVkp5!eu)K?12A+%AY$n21w100`W!Ie2!JNa)Oe1hjRJDvBCi~P zRRA=OCK;Eot-m8-BB1^XL^uyX)tKN3kzne8BD>1O{9Hn)WdQDVY&Q*+i5|Lj)cqQ*tNMt=i9(ceZ4|oa)Crq|D;yv`=0BPpT4QT-&8gd50dkM)v z;FP@eR!Zese&ENSr-CbL<9v(lrji8Jb!Si_{7{u30;-wdELxAl|M2Okj*IcRLDHCM zG90=`!?2SMZ015*DWYVCsxN$P6 zFsx*N@5pd47Ylry9VCM;!~tf**zsu*4rr-g08t;dMX; zr3au0E%X@i6fUnge$a(W0m!SbLjb9$;G8I&+}QAOR<6Ydd>W8AoPi0)sgo84tV2x! z@MOC(aPaxM-jx7MJJq8I>Gn9HSW4!5iR1wfP~S^@DjIYdCl@DL6^uhP+Qwc0__fn# zPeln=B*Rf2j7Sm$Nz^|F0Zb+z$Z-LkNKRO^30j4yB*&Yn3M7D$q$V(#Q1c)LZd~yY zl*~c$qE(ma6g5;(F~0y+BM*SbfGkuT6;L){E}8(OY>QinE0l0g_Yx3{Rt^B{+HN)C zfcM|xD{--*tGE!UP~%$s4d`i%AV?nNor*KO%R(vy(n0Eh@Jry7#H3t2sP1iC+1POF zSO8z_F+dUrh`6650cju`%(%Evw7i7ZF~pcTac2C&CKQjkzRvk7p1 zOwI=UgxdYk<*P)CsHAXGwvxM~ZUJLilBc%rA93m7%d(7Xcmn9h zP>m7DYd}0l6fqfwK@kkYh&mmS7a;%92%%Zr_H>iFN%J$hBZ~@YMu*NNS? z-_C6j!)>WCyQ1hWLVaAY?VNrUvYU5%_&I)(3RQ>+Y`L74lAe#R-jKE&nPbT($XowJ zn0O-bm{hU^Kb0CY{msB371@0pqYF&(=5GazY#cDI5%QsJsXiiAM2Wb0(cE&cQ2J7Q zlDh8`*+NmI^+`}+qo5Yn}R+wP*hg(F{qOa}8p3lQ*IELVSoC?w#Ni0-YGRef9r^rNnBN-+fkC9F!v zK7mEYT=FEbh>9|aqU#+`PRa*1*a0j+L2=XQdC7p*GGh-oMq^_&YACIHb8vQ+m0`R7 zdr^^?HTuuktPz7STy1T2B06wcE+nM6Y6tz|ht>xB`pJLTFyB;`w-Ico z@pN9N3ak75skd&~?@&J86P>Iab)0sxTlGoOEZAJGwA>>*J$*mfY~~r?ho@SWRGf~k zLe9nI)zXflFud&eM;V=Hmhf9&PYKqBlbN2bDV3$smg!OPj?hThcE&pE&$z7*_6)^e z_!noJ=54x|iedKolFvDK8Xm#lG)bvWDf&F&TUuIrke|-$@l1=tuRk)P|GGyvH;RIH?&>yd z6>n?6@|twc?;4}*{SMq6SO1ZIsiDd@8QhSgF#1)}w6Z#Yo0UT^&TFzjGe~1#dr?H@ zn4E9G?(>UlUz_~7BDF+FNJ#X1NB2q!rv6>Nv82$Zmgfnq4m-tI1=A5tT-C~!iJN$Z z4-5%G0|@bYpv$dhI9}iGp6ML#>(Prhl;ZD=5i|Ifl*%ReV{ts|FC{l?odoH0VzEe; zyKKAYO!;Th4HOLrOX7)urLkOS0tWeWp@J9ObS1$fl{h6LnrUI=gm+CX&V)>2MwY5L z)gG`$ayw)Wl7(b`5SPA=?o+(cFew%41d7jq7Ael5uPgF*AEQujTv?qMA~UjRwPlPV z?GUAyj~*tFh^F%X$@O$6+et?>h3PY6NkajTS>cz2CkE-ilW|v~l9$-}MEj&j>`7UB z&N;Pvjs~%>X02ON+SU@7z-o!Cna1L?@Jo-SLo0>$n=P!ZHpNh22`vm!&=dAsE_7+( zm~mw`SKDTl>GI>lSE5KLP5lCFOZIh===t>x#C5c@w9-}CqT&>Acu7TlkCOdefTaGw z;J7@>o@fsQ0l^5VAYql|0tt}5LSYA8%od9l^$}_+{mcCnOKTv8t`WK%+2RCX%6;_S zZamXbX9}W6Z&vFOMvE3@#m)=DWV6KzdKK^_6XjYiT;leCH71@CS&t7XBw&U7Ph*&r z5@;9X=qeab@d%NiRM6wVk6(_@3HB0Ev1~KXFZR5xZI|4vqv$e-R)< zJ4mCV@=^#0w6l#u6cu5kaEiRiLcr^bsGJ!bJ6qWQpAii>jmWd5-{IX|_NLP8v#O=M8((V1fk$iXUzWaj(_N|eS&j(Wfl z{%EoVbCnQ0mRvF?(X~>03Bd0qpL0VM{1&k&$uvRev7&zR4xY#$A z7DH*sAeu@K(DLd~%4Zt7Nh9gbB_~n9lcjqu!8`OTu`7Hl*}mM;=_F+#>)Up9W$8OjCcZy+iB{ zg-7i5@sjGEV1KW6$P~~jgAT*#|G0MJ?$f21ny_PJmF(Q_u~xoB8=`tePvp^gwymj8 zUq>tVbm0;QD8)dPy{j{uYW77LP$!>*LRMK z`}9unf{E)KhJ!(Ndqg3vcGjG~cB$A3y1eyG=pJ=|>xpfJ+R_2IbZ^BX!7(UWIDXBECUI-`@dZs1+GK zdMH#S)n5%fSKUXu3Jn4t9ATL6bsJh;hRs+xAuvM5MAo#_#)@+bi9$1~*XWP(p0k~s`e1ZI*JIFj%r z`V&1M@K~S)69E`%IA&l&AN7n$pt+Dt18f_Q0~gGiaS6@3BR8Jh(7&dA)$b9tgBC!e ze@7GIsarp_M&13RwM^>vWPMR_1UsfUX1&@rV%tgK8+-~D|U}~_{@0~*YWLB|% z|8_2(iCt(BvznkBeyW!;_8yTVj`n`1B!>K@z&Xj(k#Kh9C`}OFeXTvVl!|J`ZR}(f z+bYw`fJf)O=8t&l6vj{xq!X#$^rly7V6vjav5X&pQ&s1qI`yx;*(%>r;a2{T^>JCR z@2i{$vat)x^6{F+=Vo6Ye?L29RX{E`4KxAp5C_iXfWJSiLdVOi{Xg$d&nejDgP+{r zIERkT9*aFr(NFgJQ%?(tc=BXwxyOipN_ETOH~hhBsu#6(hq~){$Vm5YscUe)QfSJ1 zV6sapy<1HCuqEHFMyC=f=Byc^{`hU9R8#D^aB~s|XOi=a#w^IP5w}`z!C(8Eo~Qv#Bj4vNK|?W1)0!QUS?7 zINJWENnYJ&diQV7=Cs6A-P9sS@ju@eYcFZT{cG-h#GKqYjJD#_jHkr5yk66K`$IVh zBNL_(>gqV5%!KLiv?5pbu;qQ`Vqaxgs@uK9C2{|kX*}BVzhbkD!k14}r}ReqPE+$9 zY9rQu&DynV!eZiLvL0`*uUO^#e=x>;Zfd^uRedtHfpGQTv$DH`IHKB$>@AjNcW+#m zdPn5{vvPUUW(CQfMU8eaiMCAF+)FQ6bWb>xAgR~YD}=g^$FN(R#m)G?O=`e`}U%*-jQxIJ+DA2(m-fIsj=My^`<9WbV=c|WJ^N;jD?(V2Ub z@>d%C=dFG@Wzw}(k#h!zx|2nFa~+|Twco3fJ02ZBCxuWy_fm$=xUKUHkTD&*j)LWX z#HJV?4tBvmOg}qWB{?nZd~M-H2OULi{xLtUeP5}ckWM+(Oh?-J`M!GGp(HMWn)Mcs zclV##-EasivW%el=~~xK6E^X9uzTkLb=ZjIGwrxuW^Mf_x%TNRvblh&avq&S9)Zu` ztT$#Zd`;SpdoetOWK-}M{P_0Sx#84dZ58cER(luS%Zn$it*t^A(l#;9wT7f*bCJ4# zcx159P_Rxw|Nh-30VT&(ZEu!UKdTs9YP#_YJY5mfVeGLVZ&sd|dj0S5SWOyUUs^{* z{u#kk%O%VGzkmJBKbnTaAAbe=?~RIH{LU>UD%*#z}(e_wGXJK?xUR|td zC0Mh)w{;1ke=gcme=6TH?Gqjtt?l&tA5>CanL@YN_x#7#S#{r3-j2u7t_r_zCBkE_ z4J?B?BB-T$6eSSOFP>Y3aO&i|0-|M>03y0Wt?))25JC-&}u zUE6Y%qF`<;o}5))ot^e6QuH1*Ct>sMg?#a2x1mwJ(dM%$sc@O0xvo?4S(zPW?)8E@ zrcLhfW%uZ$gdcC36fS2hVh))9%80I`_U-M9o$q+D;N1(mH>(}N_P6zIENnS@H#g#& z>x~zVzx@h+)EpiK6jh&%R(csMrixz9Az*Y>3*NK8%55EA4S~UEN_080hdj z>slM^@9exrn4G?TMZWpr-3rN={Q93(x%P)Y0r&Dgwe~T4t6!j^Uryc+{ny<$n|yUP z+0~S=^+v~Mdb7%tC0}GIkpmWc)XlwiK4lr!7N+Fp-cb}08pK1b7rq|mcA)mfhi%JA zTy_QWL$2Mk{e`)+(x-nPq`@znwVQ@1`S(`|{FjMW2}a2C;YFFgoI-tj@#EgUb7H4j z;p^O&)+zonDShOuAB^cZ7uX(uU|<&Avh(}U+bNpm4tdv>_JZfbZ&z;#whnE(+cDWx z;_oIdy>_>5|7E56Lf)MUd_fJK&5h4heuZtwEzNq97yBQ7shw%x?3wNzP>>!Qbqc-x zSI#+;`QPJ~I)btG3@e@!(G*8ED?4AlyOAva%Rfy8dH#Hh84l(DjHlPW`86aLOB5)@ z`?fO`JNuv}Bi#~)9ez9Cxik0uF5stWX?|NQUSJjP4=(52z~VgrTidf%dnFR%=zZ+X zo6`$b8C$``nSWp^{#@(bGK|KEqU(aqs~=E&Erv&ZXR3eUzO z92b=vnu8-(f5$nub_7Edi|rL}fBYC)w6JpXmgrEpIJHqP%e+Ah7WcK&@yn@j6iI|;RU$F8$d;HY@} z54)jxhoN|DP%(EkhpX{*;=n~ZB2qJg^C)cd=l*V3@ehT7Dbg>;?ce>H4O>pJfWkjT zsdZSsY^|$4k&Z}X75l6U^MCW*>?+-Ty`#O$o?E%aT!g{<>$Bh`MT$=5Y4wH4hP`gT zh!>B7vvzeC=D+Ao^nlSRuijL9c~ES_pp;kJ*fe{frK4@b4c&RRA7;0ebt)ftsoHQj z{i)Ww`*VAquaauG4d=qr@9y7eJN*|PL~MJ*W!#eD#Yv64Ys!JOCiIZ%nU}wP^7oS7 zQ$>&~4@MX4yT!REkZ?cGb3=Vp%J&euQh%kY8osWSupyeoV5DmE>uAkWx%AignvJ=Z zZ0^MwW)oeAsG1FUBk~sZHMag;SVY*_&=`E{_>N)>8-khd7p)8TkY|c8F|8b(Poe>C z{>C^sGZ(>&CsJ{zr;rZJX~k*BhJR1*lZl72N29QSZVX%sJ=#7h+j(;aXK9yDBfg19 zPIf}_e9t<#469_Ecy2fmFjcjg7wo9$R#W`af}lHocw5>%`1g}ZnO<;lnaHb(GErG8 z-d}58-d*PZv;`^63VFrG3(yzz)r(nqFTPtmTm|%S#%2DDOykXj)VKcFO!n&|ISR~MXbhakEWk9~u^+#Tsxy{CoMZ{Rk4B_XLPBsi=`oK-e z&)rX7mZoU&S=``kkamYxdU>*Kox^AS5ZN-MRl`|;>R5!{&`3>ccj#x+)UZ$bp~T^= z@*M3QZww^D{TpYnsY0-wj)k4`_Ue;g4iF>$oFA8Ywgx2=_^O3{xjzE_7;uTWibPZ} z=1!$NnZjUK5Mz;5JKtv2z>+7Xb@g=N;(J836#f^i*R}9VLZW{)U(|1G&JS#bhyT3( zsGHART5dgPtNG3BCZcb$R{w?Q>)>}LWhrKuTgtz^R^`a4d7Wxq*c&OQ&JOq{7%%;( zuvGFp%M+`$?$Tz9jcWKt_al|5x_im9!|h0Io;LAtY%Hsv8bzMjzDY}STRo6hX?Y~N zj@?+M?G(P|KMUvn$zi|$L_A`v!)zz|1yJlof>X>mF?Tj^b0|kG??1mH55B7<}As z^tUg2-)I=>u?9*5PJQkD<_9xx_0LWWxG5zvu(IhO~I`@1kxk^Cs+8?`|%8 zITNkw$BT;lDw#W!0;`?7UWA8}{IwAJmXb7FY$QZGiI4ZZ@{QSq3(&sjOeM;$+U4c% zzMLs|fK*#aq<>D3FXPL%Wyt5-%QJ50GsO{^c4KgLFbb}uZs+gBhS?zf6i`hPU;SCR z`R@omo;{y&kV_NTEZD!z`XH2&j~u%;akALh)rFq9UdfkPo2`3m;>D7FfVNxeE%cPI znH77Yy9nKay@!WaipiorPH(<*yRWaPh`XA-GI>v2@peB$WSY<1ndIsWa&*VCab>Es zStD=~R3svXs_gte|7Gi)o0z>(WgW{z9nJS3e~hU_yp)uSQhEc6dD{cT`E&Yxu~O{` zf5$Jm5CL!Ro<67Bh!>bP^Na|_Ju_CZPALod>^A5O(SDn!?gAf%h*a}!}l*op76Vax~ViUQq6p&FrAs7=}JQodkdXVto2SF~@I#43iiy zvHW>Wq(xw(?CHvy2)(zzabDyZ;|#LbSo}qJcWoR-<=vK?oA37@Yws7=EQ{>($C+AM zM1l6a3lsFEJKh+u1rW@MZ>q!QR@qr1|}~ ztj@0fJ-a8TC+o8cOe0x!?Z55?LUQ2u{-^<%Pu+yIVnSZ;DR^m+x4pd&l~k>{?1xRx zEWVzbVR@|ZqU-&ClBA3O?(r{|y6(wwKEko4?{Z_cVhM8*?t9bRQY&eNRu#A6SvcQ)Ede@vbmo6BniE1DiYB( zW0L15XZCgG>B7^or!Rg>>h#RqXzccR zD=~`>xtQuMtrT8w{j#eAQ=8k{Jauk|3!n4C{IlzeOus_g%yWW5?6BF!4qtmUx{1u( zy%S?J_9E?1gmoqpHPJ&y}?Xs85(u z;KW6w|J(UCPt7-8+2Qwp+l|8=F_{1IS^92&LY5bxO2|;7)itl`zOo&!jxc)n{E;P# zKtAa_LE)A$ns~GmFKNhm~>Sip>&d#>ebG;FG9(GCOPh|4x}XgkV_hS zJhLbrJKRW(!-buq{bs&LqfFO&IB_SYQoBJnC@I0kb+NSkfV>fx79u@dUaoyimHIx{ zgNB8LUp<#k3h)R^rC?ZSCu-t^`{GWYn97(&$u8F;o6o{PmG7}CABSKmA902O6~8rM zbzo|yM-CGQO~(DU@Pc&Lu$C#1Tw9;*|C3@8hW9{E-CS~qGaa3GqYyKR*a!Ul{s@MA zYFbEktiT0wiI1aFYJrcV;aRDnk-@7(YF^z`mYi%FT)?yJSow)(KrDB|Bp+xplO82K zGj&4DCr-#O@P|qM&h?;W;#WD^31pJ;#KKw_80BPVGaroLRcOEK5lS+&&3z@nuOGy} zHk_Ev_cT^Wp9ecY_ACIOmXTjhh+q7XLFl;9f`AA=6<0q{*TmquGL}q$pMOnTSD!vW z_o;z_hdOVUi@flHfbGOglKLR7v-X#g^m&QmJnEc)a=iGYM;DIe`{ecU)O9;0H>?h+ z9(BZ-QPdfv3SqcCIF`CW#50xUW3`4R1|E2lPK%}cH6zcWm3#yO;s&ac>8ZIrI8LcY*;20zWDO=#X=pM!gbD}CV~6)1VHx6Isb$L(x4%fRpMTdI@cI}TD4Hnm2Qw`77c%4Ls8Q*dhv@RrhT^A`owLI!r9K1y|ALLeRXE^lgv+f?&5kt_eRP(p~q9@|~9mBeBNXEbY*2|{SfXmT~G`a=_DYm)w zY#-|vqm%OG!Cn{9c$C+pcx?Mt(5A^B}uPK~UA@PRJxlQ?fVMctP_=tJB)!io+$ zAux{`m3Qm(W1oUu&-K|_Y?iXT>$-O=WW4CPGBQ2`=`;)QRj_cE(+B?FN zw_2xd;(c3sH;jutR;^XK%kB(G#qC^MPSSZ~K7u*FcxL(fwVbS>9GH`C2Or&Qi%vN) zjcvqjwT{bJFyyS>$v2!#Ke!aRE|LqoeeHsZ<~YK8d%e@J*)_AMvoou(z?36qeaXf9 zsh@Y?(-_E7FBWvza^62SJ=O9n%OrRFN1KVe~!zHH69j?(}8 ziJzbw?G?y*@h-ohd(7YZFyG2=!?2~Zd%g9M_t!#frZjLRK6=bf;u2$MIMI?) z=uDJ(G)l zifz85G9aUnW!d|xnTmyGM=PP6j~KYg7ln$#$>%6PG4Z+6bS_Kol;Fq=_Ub;x^9r?P zMc^p3$^+=-QO>Ve_SXPrjRncjO6ovdmf~!9#>dx*ouD*MaZMJwn7Lwh zy028q@l^bPJB@NzypQ?_q2B}S{1v9SaRifOa5`oo*;hhIJ9abydN}=)iGf@gCS`)8 z=;&Q%SM8jB=)aF+lZj)Y&xVs#3=DKe)Z%r+gm@JJ>n6VtOyqno97Wz^9^Do&Yp zv84Y8{d|)YPPPbZg=T2HMSjRdmX+L*(qQta={SXiBAtiT2$C+N2XPM)bErS;z$&S8 za9AW|6A66y6Z5EZnM8FxGH>bVVr{P+;=MVh@3I1-Grc+Ek++WHl=Kl-1HJglD4P;q zvxRbccmXq7QbvvEAKEalKG(umMVF%dks2@h_tazQ zxg29dGQOyz%&V!eH{SA%fYAp6dR>&cLrGgW%2?vs!D%?|KDAD3O$|UL9eYns4+Y(m z6;gO&=(uFXjBE!=Ay*|kJ7|S()Qo89=5vh!Dzt(r+x(*;(zW;y-!fcPRGH);E@k6R zbv~)o3|Z{aIaEg{m4Y#tDq?RPty{&FK+ypM@NlpS!g*C23E0uP+WpREQ6qYZD@;m* z@lM2pRARhD0}hU$@$$H|5%SX{!XQFcwDx}_`P?AKMqIishgb&j6w)O2m6daQ5VaKe z(@@(NicC{eZIWzZ9()nsWTGf4fqbgT{tsNO2!D_VK=b@u`2rE*FjO>N$sr3=Mev>D z&jYP{{u5j5{6}T1`f-JTx`m3}=yFd+6R(!9R4Dc*h9^2KSZnPFm z$18#6=fcpiqt%0yjMg4VctT+vO+z>li}l9inc2?(r!2%3Dc#+VNJ=2b1LTj;kEXZ~ zAIf|?&^N;lrYB*SPOhbK%|V} zChNzkV=sYx`1FHgxH4`G3eQ&=2MP+PH@6I_5z(*&I}}RoMw6j+i2$U^sINLu1>gvX znv#UAWJv$742|{{@&fpSl$r@=5P;TAl_e;NiBBe=WPl4&cjg4~trfZ85pjrLQ(HPn zHREHl9wE1ca3>xTR9*!c5DxrbQzf9BNjUUqUURxAqK(pxq9g`!v^q~9XJO~oHqJ!E zaKimKC59Eug_Ud+R}e*=)OMs6J}-vrPyB!Cyw!bM^Jc2+<&bqcs-dz4+(F0 zcRn%&J>DHuBjQ3i_d(ITkC7R8I~*~jT;#F0D^K^)O3~f!#AuC7#5LtwuwfbxN{s`G zLG-J|(}TL=LkLR=b&%mfC^Zvg1Hx36{M33xgIdu(u|H9T3VL_MyapjM-adcj;<0Rs z6dHsC_Cs>|2^Epv;#vs^1TJA~cR2Uy+cB@$`$SUnEvUjEK8-xe|HzSu0mJH7FtCXG zta}cDtAkX*K}C?j4I^6(x^Hm|sMV(>UjnYsvQ=0S23JAWC2^>QJQmPN3pfOPr+sm( z6uN6S%;iYUgpe--wCR~z-SPd@#ge;&8wYtt-BXFoyCl4iP;#H0VnZZwspFxvn0Fi? z{`#SQ0;;5L@;(3yEQ5vy5QgH7#RIKDbSullQOeu1|hl(1@nUt0&?9}TvOXK1d$M$91JtW(;I;h(As(s zlxGrg#}J_Z;0mIp1)xs5k8np5MMfYA0QK3Ca^x)zG0Rzf9)kK4QWrR*{X1*7i-=?% zv4fUi0+%D7eUvLI(7xI08F zYgZjT4*&s7|KIb*A{SeZ+(eLh^jGP_D8HA$cF{40B!;{vs>sc8a8UM;=^nE@+k|Tmf0gNGqVn@J$LCh+`R=8D~`m z26Wy#l)fA6I+!53R!S82sSsRF9<3@Ru1FI zq9Uqr$_NaS!vX!V%xJE!NlD?UeD4d82~3hsrg0hJM(Q9KrPTKD@IxG}pg?+*@GF$_ zt_D63Efvo-q)o!POylucmWzZFzHyU)k4z{Hbp*y`tZ#FcLn(2rq^t(VQ^G6bqZtzX&dapQ2%nvjxFPE;N!%jsX_TJ(>x1KmudI=_GiNQ)2|-jt+yG(h$G z)yy&=$M)WzQGm!69p~~+%`WryUKgH~$qq_GT>S|63Prwc8ivV}Eb$OlM~4(>RuJH* zqvao&IgoAdy~JZNQG(gyd^{3Tuiq(1esl@jax{e{Py?C%B+XI6-}4BgC43}-dxL74 zZk&%{aOR2xQVeH^$2oKXpGo4815$6?C;mP~NVk>Q2<0e)6_Z~abLJR3u_>bUn9l|Z1Vc$fZcfMs2dV79-Irse>S$4OMX3u;^jp;dCD>z423b?Is3V9wVE4Z= zGFeruVfqQyD@iVHqh36nf3;hj&(3nz2h_|mw*BHjkj8?3R5mPFALS5DK!%~GN~RU8 zx2H*?#V~4F_z0q#VdQY*37pCB-M%61p@$JFCba(4O*-Q=)R=lGDd|2rGmFm%Wp8$e zY?xP+g3WDg&|VH>SGQ!Kvq6=+B6wGsj*vwM**!~qJgnBtJ|7Asc@73J%C#sjjpAK)AJx}`i-#LN1@~Pkjuo z6dCCc^=Xlq)h~;)xv>SqDL5fKgyTEM~@%3t- zer{f^zwK`z)}ae&_kC)&v#LVHw!79iD_y6r#M-m9n!o(+3>oxgwkW>tEAfr*dNOaB z`Vv90zv@jy$83$F%BtX6ZW%G8!8vk0;WuY6+o`YPn~kgW&0(o-9Xo(FWn>o5hxGNO zr94CT!!=eLvgIJ#JN}%_U|YRvH_QCSye^h0-)wgp#lF4&e0KyCb5wIyj;aP{i?WL+ z!e8&)@^kWMHx@ElN*7r+^s?yuvy+?2Xb)~y{@|kOV^_%(YZ(*ZBQfNJPjMU0T_!-;dKhILb z*_nNC=gw0<$=3941ivsp>COA{TK0ay?(<>F9x_;#^|Bp(e_5pBZ~>V=hsdldx9(j9mroXt&hfi2Vkk-7L=@?DqVbVO*{Y`16bW=<(t6r=Z*=3ZwHl0;6Lezq*zniqIi0v{ zk*#X6@Es=6ty>H8Bhs6&51!Z}ggbYWYKN5sJ-Pkn0CYF8*vp{Zt5#*!5>`|EN4xpR zyzJ%Ah({j?|NGCcxHIABg}~3Cth_x4OL=-Qcu?f^7TeZ!59Y zm(LWpxN2`08}{>zKGTr=1)aqP{5B|=`W*C9DuVybn{UgP3qi6OF?+V!ye|U}Wupot znN=F3S$*PdDj!cQsKD#m{EtPn7(KJ=pHEZ_^Sn6xxjnTx?epEpyW%}vrKUQk}y38|h7L!yd+mmx%-haJT>GZ}a$@ z!-wDL8)|C;_I{^L)V;1`q$0RlK*n2@$bil=$%|9Tj#^g=e|+HF=< zGIH8vn-zMJ2HKkl#%lbtIu)AMYn)jc>I$lxZsXkV;vh|!_O}ZQ2Y1w>4f2h@96VoB z9ItsbdOfx@Jw0_5dQO}3X=jRUET~}?_F?~4k>3J%aTU=IPnnkOeVKJvnj+ zR*l<}Z<2SuLH1Uj_(_>*J>%njnU^{c<$5OMC+(XhCMK~yYi15pC7Yg-51qO?i5ZSi z7^L1K|5?fzpY2;bsdVIh7 zw6n5(RXzATc$t^ItG8QxbD8^MZQa>|Vd$N%P4B(Y(yXFl;j2~d#z`8_AK!aNEN%Wo zNMOt0a;2Zbs&&sSDvb7%im!_dM&~nykLkT3cQ;LWU<#VK2p@h;nHDjbu~_RUeoFje zTlL2M%L-dt6D|_%u*$_0!btmx>%1OH?L2FPBXFq)6K?%&70=WQk*P?7h!0e45Qxnz z(wuO$qwVv62&+mZyHLvdyVc;Fzd>f+PP5r~&Dd&BnZ+ZeVv+21e3`Lcdu+TwCj4d) z>I;Nej6~@175m;}j2ykHK8fhd%>G?R_2snjTB#(vHs`%tl?^)cd@5w_n(bwfyl$TD z_V+*D&H|H)+=-Fvs59}%PB9T@8<&O|zbrUuT^?Mt8xvImM;o{t#(bmxO+7a05SYKr zq!9DtMuq2n_he9!@S)0XTzS=srWP{|P1Q;%GdzvC3*2m)<$@JP#@$)gU3z?=qv9yU zfU?}SDEvV1TMMGcC@^U53B-oqkm)oIa>hZ32^i4EsLxGZ=mym|)E10HK{{{zT(UnN z)0w}0_JHQUxztQqUU)JFb6;KvVy|2ipu@s7#%ou%KQIg=7D2Abp*1BQpd!WpNy|I{o^Uz8CYhQ59)6W%>8@aK7K+RrEC z9aD;u%2OvZ#0*8wD{kQh6Ki6ub3R+a{?v2Wj#0e}ADb-Rtb}5(D&;Bj!b%lZQ3hiC=AEQsXpL72_EuO+u*cpAM3pL0y5qeJiG^+S>MXT|yVztL;UG}49 z)S)N(Yj-_s?_-t+>aDOcwCe{Cy&?>D1AyYZfY8l3kHPskv@GP?fBOZeKVJM8bHDPc zC75sV>U?wTYCCk=-{S5ct#jt(k4wzow5I^Vlgy^}vp*^)-{U{GDgINGe&nqU zCue!AfKMjU@pMN*Q5q7@4{fc>1kd!WcRH%LQ2XuCfdNU)w z@yjE-LjRLp-q1&93Wfhsa8^-mbz2nfPbo! zcXxLSPSEz|G8jDMB@gGY_gZs)bB=W-#_P*1=768owqdS1c57>9l^qe|EK`fC0!>yc zy_Bjr1d{lU8lcu6Y_x>qcb67_oF%0=)+YLerdb~NLHdCNjB zNK!7%$znLMU_DTE%hcD9nzfdyzoIi_XNs}0Z8T=DHNTDGG_8J9 zQ_~o^^J`UJP_nnw(z{PJs^eU~Cx;(LgclB0DF<7Zv+Z?DPg^rBMWf5?X-2U zxdXtes@#&-;lx!R)&%LjmS3EGR5%TaW?+iU&j9cKfRvGJIX^s1et2!sQo4b2Ze3Lu zA^BF!_fsG67qD4akEG#^-WrX8X_>s-k`Vp-Jap@oCuZ&mbBWj58is5E{vm?~dQ zfSs@IM^!vUD9Fhz=8iS@)P!zKeMT6L;xH$R%_f%mUnY$)k?9=jl(YjRfVzp*O2IoVpA$hO@fiA-xQIHckDcP-~Mt{99P!HHP4yo*b zSW7Pm18cJvi%59`uU}0qYIQQby%1V2wTaJN&A!0c;kFt@xmm`|pPO|mZ-2JiC;0qV zdjrAoVoZ@Qa=FAvnXWBfT~jTiT$tl2DBSyz7Pc!i^@43x!P5&rQz9cGIKEQ&7VKrn9fG zE-T?;Zm1(gvw+DMd`vMU2242lW*Tu26!+3j#A;`P1%)gUUZBYatQPq@ymPZMU(6#z zIk#3EZ(;QWjxHOX;gyXSnh#G7&F%0nO3`#o-`k)X({IXdy!rd&;KeL-`r;3e;86l{ zO6#Zlc4Kdi@I&FeF3y9?)`+FA+pet>VQ~T5uDZ9wh%AAg!)EUd1EZq{Vu7Ig;+PZ} zkV{gB+72*sImR9HMPY7?ui%toU=Dt+j{W{Ip<`DE}VpREOI<(k=boeUR(FFgVUjrp(gat9XA@; zrx9I}HPbvz<~+&D@BqRZq`2pD4qHSI{#gm#lYBFp<#Ppe_1FHX6oMS=Ok?CGxn4Uz z%D6yg2#;XRRhlS`^po9a5z76ccBm}_xO(~y`qE`oP8ZQN?w(qM{U$sGM<9hB2&UG( zSVyrt`6CZ2r0-{2IY;L3#^Ut_wA<3x(yInF^=yO8Tq-sv>R^_MdhQh`!=~{Z)T5kJ zYQ?5A!POTA4m>hi)@^pA4Y#MO5HN=3LWlB#vG|+&~jHd{0MB?dm3G3>8{TziUzG zL>$0G&K}O1j^RUW7qlu3wzP9k}vod*yyQ~!v+&hOx^1wPN}&N zT)->gjA6Rdzko|U%pVolsHJYs)oxMSEAYVfjb5m)@>@Xw?QsUdQ+1^U27jXznmAyy zE2|neUu04*%XN?@m=Fsg1c8^fPp4can9lK78)aPjbzK2@ znS?$m{)ym%b3@M-Q^R4%ns)`A#>e-!oXwpP4wOa2p?e^+Td`9|erP)>tKkOeg~I_$ zvk*@=&3Jd>NKr=7Ifhx&Y4JR)RdzuNCnZ71Ae%{-Y2-WI`l-#R;CWnD@nQq(pG>-vzsH(oH4JZSe5>&WV zO#G1#_+6aC(5n8g%OIjs2)snLn0=9&x(r_V&FbKkyYkz2jbw?ZZjm2q;`h3F3>PPs zuE}k^ob*WHAGUm^7A&Z#3QVu;BnNc9IxLX@`j|+gbhI0Rm+~S$e(MbXhr%~Ys(90% zkj^S;74(d_#&8dQlzX9J4#OVI$kC zeJ%4{e8c7o;g#k-0Eq>fx0e~= z36bhy*H|XFHn)g=TIr`}fG^x#)t+U@qRtUb4C=0IH=r_nHs@_x*j^T{5wePIY*?Q> z%Nq4(5{XkomvOmcyPs4z__1DLt{5fNbV$?{uSyBo2i0Vl+fC)YbiPrT^{Mfcd6*5Bg? z!`OD2UQEZ8nL#(=-cuf#Kn22M3~UPV_pURFcK}z(sRaTh99e+^wkiv=AMi&!J3})j z0zQ`a6Zw=5=kZ^_#Z|n@mRn8cD?ey!SW=q0^l2tU3fS)lgl4jYFW&<*({L$nx1rK=m{v@2@olG<#O%DEG5^v*O z0J-zHu-XoIS6#`YYHrBzI+-d1_`XcHj$c#Rq|ZAAE#}RR%T2Xr=vAnuCahuo@iSy zTQoqu-|K|+x=3y|6P6)f^f<8mXpMtPR$@K9iK^#jUaSS3!4_G%XZWI$PZF6kLN&*| zPWM!ic0;2>7Xc|RD=9)z=u}3ZTlS7waX*}Vq1ftm>cH7i#>KGkBu;=_CqusuUB$ef zY);;AXEubJH@pU)M64vc+3Pl^z$4hr+IEQ+MAD0!j@5wfN({P37mD}z75M7!pQ51g})5V~x?;|n!&+E?^45gxHZlmEV?{8WMBSmuao zKr)Q2l(ZpqtvYn+C1xiD*dt5+ttcTm6&#io<_sAejPqmRJICv8Y-f(T9XZ`M@%L&n zs_=i*LOqY(@jAU--vped=7C<&5QuM7$H3`q6|&^myo@$lEE!pe4OCzF#96XEW_2nI z?Y&gmN~KdRTxn7dBv?{g9j=C=F?9DqQRY&Tt->vq z5&d;G)pC;a-Z5n@;i0KO;UH)}W?A4c=uZge$Rvj{IdEaTzD<=*b$iVIoYtnzPGjU# zQsa7JkIJI!#c^UIYgt0|+}%Mq^fR0{%RVfFUt@v|KVl{-9rpgbAX$Rq9E}&dLrP3n z)Zo6xfZ70+`Yp{soiwd#nyuo-A5J+S`J6$@4y;aEgIj_dxPnbH2Kvj8! zs7BjQd^wW>*{?H%t3@TbOFE$o`u;pYZMI;`WAUL>m`Ae;&|PuPIhOI9*{bpfgx)Ty z5Y-pH$6Kb~XugGw*H`*v!zKLyRtn?-b{9bBEXMsnu5wP5;VVAB*xmsu;&soorPA`3AJ_PwQ7TFc_5q#U{xuc%G1 za6uSj?(`VLMJlx+Z%=w#` z0$9!~&CED^Utgv#B?g=&70P7h(&RFVPR!!V3B^XRH*6UK3n79AJw!8^JK8x`(fp8O zVqA!s&!0ysqrFm#c zk`_K0Zl4Uz!uA*~p4<0-Fio_W4bP&RJ;)1p#z_-sJDPZBsNLZ_CX1;pR``_WHrbQj)r}D=W%%FOu~L_MbWOP#ZaTF_q#MMiHudatshRj zB8ZZj&FG%hbK>%6Zs?cFBrBJwl+&dneEkg1=P2_=#@I_U^9|LW2YXRDx5G}#LaCMm zIH8@gf@7b^$g*%kKyZJFo0V!t!0^bO&LZbjyIzqqdBVJ5W7KV26+pT*U@wFVKl_Ox zle5AT23>w^zB=hLcv~(k$t1o`Tt)|e8{G~pM-Bq#ZW6j>QFojWzeYVe| zGz!Ew_pW9;aA7RYB8PTNSV5P}e;Y>sRC3u}eFwr+M}$#h)pMJyDbW2SLT>v7!3Bjd zUBbgWS~U%K7I7Srl*QW4+Ww*U&%#M47zV7`ZE}t91=}u@hNMA<%|&Cb!6K&!#Z4~Y zvkycXPXji31VgG=AGj5rLu-)6LmcDOn29ciUm6PDqOoCtX4wZRUyPv1IfJ)dwZ%8p zg{n5vs?0cbRHUR^=G+ALpF+g-ASt#|&^!2X;!;a)wrW}CqxY}FO^b>ZeXRwKqX#;< zB_7;c9xxJK&ZmEa+KJE5Sm5wxst&m~3L0JRc}+xW%(&6^^BU_U*tQvXRgJ7!SQwqm zk>E0stEx%L%^;pWCWoV#b#`R-p5r<^H?e+H-})JKf0K%fTDhU9b5_sr$Wu_^YxI~c zFq6{O>mz2)9a}yEMUiW{K~^w)IVowMwafZP1|!XX#T*NzlGS{Qm1B7prm5K28!Ec^(VOk zH{H;b=r(&;14#~cYF}&)f7ef122_DDa?#erGzZ2xGE1W9=rU0fnn+7(AG=d$qxMg{ z1vW{q$xnnXjSa)VKbj6&A}+8$olibcUuU0NP!0}FIYc+8+-$8K(=|1Fr_Qy$nZL*E=Kvgu1e~>+ieMf*9adBCB0P z3P*~t)J;-j?TgfE=H!bj_Y5}jSu(hwfdh^(^rW6iceQe*PvmInVSq2*T*gN&1;~}~ zwbk+=7#V|eVS)ip-~&{G20a_4Q^P(rlG)&L`IOWIRvr<9(o`N66Rw~CsTUAI#8Yu~ z;(ZSi_L9T@oR7A+zVm>5n`=;8vH^?2lS$_1 zQEpwoHB*+f={ohaK1ubHv2ImZ$m2?+6wJPYi=pjgDXTa+IQ%VEPul*!Ex4dM!cZN} zl&`z(D#0dCsz)OahdH!J{WmhgW4E<+*SB%490X9Fu7e5~R6R2dQfWgtQ8 zIR#Z|Ow|&g$Hb#=;cToSDKUc%Oo~F|FJX)sqXmm-6O)?lrH(ZW6FI+ru5@>I;PhBB zFslc5=(SpyRkC2HFO@(2WTnF?<+K7kFFHd1S^F)N{iRX^dN(!0*YIN7zI0A|c)IJ9 z{WIIuM)0ZYYVuhq+tyglQu?3xQ2m)QPR1Cf#L=vA5F2EU2DltCcz!;3HLjx%d#E3d zE-1B@zO)xOx5ry^^#sRUy7Kd-n@68%ByeuSSlITN}%!fW5% zv#_s~%^{Xx6e%-t(B`?uMY0@(DGlzEkmy)ze~Rd>j((vPB!<}vFwIO^Z$-QhcdO%I2Zr=uLGT)KQj8g^=8BlzHrQ%HOVKI{`PmDv}(XKZm zRtSOu8-1#}?VL4pctXz2td^Hxxh`bpsQJF5W-MNhazNb!;;T4CIC0z!dzjoGS4C-* z`D`02I#1zUCgLM-p?dmThP6~gJp19;eU*C7CRV;w1VP33F8g6v=wc?yCRVLP#FgU$ zd-S&q{TxNQqOS($1-A8S;nZ)L9Gh4}dEX?ai+BZ_*zd(6g2@lV8monBHnBak6rf^} z8+qS51gI*!s+!O__WL$Pc8>^WfUgDu3>gu2{2n&^9(H`NRyf}*!@5P0-z6dl@-=^c zD7(ABe!Q50UBtuSU*9al=oG8pAtLVW>wfbvynmrOvPM0BiEvSV0nnU-& zzfdlP?KMy^T@-HEyVJ~W{@JZ1Xp1?AYI@?@R@L#jp|9EQYZ|0zX$=e-F2XZYeDs4= zKInj$fZ{EBNds@&aIZ}-GFSsyGjnZl(`@I)L8FI6+R5(C?&w{aZ9?JCts6(0g1YRj zvA>{AnLYS}Z2e^%L;M`~!(*q1Vf3~uK|uk-=rEnmgfqKqz~^G-gRsT=9mt3}lv+2m zFjwtsLE+TPT<(N4FB`FbkO40_&wXw~+xko2jTDz7el##>4`}5|nVLd7ELG z_^s;hBNxC%@p#jimp?uMu(R3)N*fx5WVZvR2#NaZy~?+)v&YVk51W;MJlq0qfTNVf z(fviCnW4cRk~{YpRt?*VQkzgFZ<;-Kd*7Gn>=D?S-o7C6rA`}NVobfF91c9`3v79ET5HnQKv}Z%! zsWTXTEBpq~Q0mo+w?IE{)M0}cBSPg#c{hciHhnASLM|G>*#kk~Il55iFV-@`IW+5C ziu;RXREho++sl<_<=X0>6oL&Zmy%Q$4iS}n++Sb-PM+qV;;h-i@ZUERF$dB7?57-y zO~AAfQ!#aSEXVSB7utqf5<3g0qsg07_-`AW;A!faJ=0LH)5t#jBz86S9YYJuR6ee% zG=fKNbPz81Sb&l#2W*svs}i_TmWM^@-bxI$#i49Q>G@0=@T@f6f%joHgQe8uV{;P| z`2eBfy&9v3D`RKT_nqTnwboTgu4S!k4jtYj%fG(|#pVeXPwedLUUUn*`QPa)N`#I6 z4qUzTU2dA3aa(ox!h3FhM|QjmcRwcb;8S3zo2)pedx({s)P*aLey=dWt|%*SyeU_B z>>XZBFiIAJ;GoEgKJ=%+F@klF5ndd`9Vx(&Q|%C$gW%#^2P={IxZHF+eGMW<_P=ba zJhcop`?hR3bOo>d;MH-2{Wa!z(eKr^e%!#0{o}_JaD)Z%@UdlkCd;rkk>#-6q?5}zjJ*X#W{l;CK~z5 z4d@ldIUWw0WXm4?sea}2RbaYuaLG|R&8j%4Ohrjzp-g;8owj)s!&_@f+EIly_r%IO zWFhY1241?LCnO1X!5Pl&FSO(%#pfM6>#F#g(v^&MY@oNoaA5Ip_-3-3a*Sx1(UAmK zo=1pu-L2b8$q?rMBh-u(4MU2@00dTXaiVVMtz)?C6f|Hbl*4UX=Q1g4o8lm!VZ+Op z{6Aw*nAT?9ld3mAikKn`I97Z9#vURcz{birk8*I6u1KGR@~T*#h~gpcU6M+Wh?4A+ zffZJklc(rO!Cyd%G4?%q(p7I1%IUh|izi+p=y}pcd!@r*pR@E<1wQn{VYW!8@hKn% zruo7(`j<(vkgQ|d%%Z462Rm-5%FQNet_GVE-&VvF`Oo~PtCS#UtTZL5uNGCk$Iz91 zY7eCcB_8H2<=7yfZ8>5fywyQ^3(yQbHd}eL1n;CzEvHGNDKR`|!x^Fxs#G@oy+F7l zS>@9#;I1*kg+y*KO2PwFnpN0>Zzc9lD&ECu#WSl!h}|C*2$f>w3kt{6R)<}kwZMB(g z4R8p#npGZeU~hS3j~)BrwIFtFR%x2vy7DI8Rd}XNY~T0zgW^!4cc+%^na4yR@=#Qy=^*+ye{eFqcf~%C6gW$xuykFG!b;0` z;o-xG6Aqx&TK9Eo;xBLwG=OBwb<6An>l)|G%#2}YIf8d!k8d9F(Xu?ACW0l z!Yy#%Jy>+Yx-ou@k<+>vq0Ev@T);spqQzMRL510*h!Rvb0=bz|ec3AKd3;h6;O#G0 z2MV2Wc;S#}oov03Y#~|!7sU}GI|YkN46wJ0cQTV?wjn+uet(*aw7%5NTUn+>YEWMi z-052kub1Yw$j=`QuCgpT+Z=aSwE4Ecs0IRXg0gJ|8%&t#mzUQjU71JIisU@AV;|RF zJVe3Cq%tm->UYXXH-D}g;r=Bp(EQ0Yp6T2ofl5yyB$Snvf!LdCOFV4CztQ#DwL|8@ z074vK510P2q_$j6&(_(9;%)(aC^Un<#L0UiF(i=E#PUC*#;Hd}pB(=2?Y(4(=@$lW6F)5*N8SLwM%zwgAPEY*dGUy^+c-GW2J zLVJ#f3r#Cehkf-EZ3l}VJWXKz0YSb*#7`r`c$LY0@)W01H$^6$9e2zsov&$qia7WK z&UbSStYmwY$K&ewTA6R3y3Pj7?cN`3C&RqCPKj<=lHvFI-FX2PGF7k^Bec-b{L3Ot+^{ceV5q|fedY6t;9 zm0J(6qxisKCyq^g9i7XoGDEyZ>qszDG1FMP`iZ-EzNp6O5aE;fg8LmNpmkZZDdc@B zeIiOqp;z{W!v;&kQ&f}}r-~cEOcuyPhwbLSU09TsP?#yt!|DPj85g@c*ytLds1;48 zps&r1iy$L@q%QqJNa$`^0*5)=`|w>qRu*Hw7aROdBSWZDu$5^gpHq{b!hmwmZS@Bq zJ-U4}i%6C7G;vSI6Gf(xZt(vGDDFV4e{g@7uUm~-)WR}JY)pTbuqfeX=1Z!_o=%Dd zt9|>(u8uT_hCtGrZa($-d$AhIhqg2|!%=1s?ImJYnsCy{et0JZvLSrV6EBZq?7QL3NbrC_bOc(!E&g^16cZ2GsUeebfwvrS>lk3&ymW0(<>ZJt^gF{%TYzLRDGG9nNu|&lnO!093uR|? zGKjvJ`u}8B8ZAPs?fE*`_FQq)d?!k`IvAN_WGfR-$YU}-vKLirj%jK&d)<$1t>WN{#Kdmx#ba+WeEXi~sO)aJo zhy3-we`LSu(OpRzRborcxHL?gwn*h1EWUl2bL6F2LP@tYGwC&TU;e^N?iqej!6C6E zs^T#fUQzwf#FH8$lk+?=s7b1Xt`t3{&0U&ZcdhWj3?o7-b!3Et*=>iEx2dfLmSp4f zGI%z=z9_x*wWJnXM9GW%pb>x4p$e&DbFTYer`EM_m{CTi;u~D&L&kp_Mm1vAjz%l| zhpOt~SM@rex&aoPA|i~lNZ>9~;Fg#p$xQf>!yKAA5U~9`PSCp zx*1nwsX!Y8JqvJ0(~b9K({#-@%7~&Y=Phi&E2q` zQdYM2wK~&HTm7M$w>D;R^wPFLuk~&z{iryzY@SW_ens|E1C1GKdVYF39WrVV_Pg)j z8E}w;`%HG?47c9n{X+Bv8+&R?XUG*YpmGLdafU{Rzx##!4*r8!M-=Q`)hSY8VBcLy zVkf({*^l)MEg0(mxpK|`={--wfRE+r?)go%A(5IDh2-68_CJh?KGD3bHlV183!2q?fm#_ueo=M}fBTfX z5_}2rkFB^r;|0^=rrmt|xbYqiQl2K87~($s4#H>0?XGUa4*CKqSlL+*f$qU+PN65NxW1jQ2d$r3W%tZG$HpL5il z*p&6$MrLLw$sjt(YmQNaQJ&I?Ubeo~*YV9qp`+zDCbmo%rGCL#ACGOpau3ksS~RB* z#OE!Q#=~+1J%a$IqY>O0&yyM-=>=AZ^&P)m7f7wd+9-Cmv?SJ5J zcV;e8h|w%z@-&p9=`Ic&PXY21uhac3?8iQ|KOPC$qYM2XG!d>Q2{~D==}H=qQ2&V^ z9GVPFGZCdw&Wp!u#6#Bv6cd!42y8zlBLzaYCHhvr9A_Ru4y?H=~O78ZZ0zxZfo zE3(2-sxFb?X3%oYb{r%cg2^@9q<(CH970ycc z3sw>?Najm?unuKxz97@8H6{vuFHFqgeMc?8e9%6F6#0ph>ze zXpN9vm1dTbd%%)^OqSrfRO)xVqKICQNL1VRSQUS(x~YoXgKP&ya)I6stB%!Iy)AIA z*C0p0X%JUSmk4WtbojNVBJ}Qa^G?L`RQ<1rqa0oodqz^e_uY!R^~Cqo>T8bm9egXg z4RVvor?K2rn%yZ($*#gW*2=WamuRQ%E$?{*sX+n_D;`=IVY|;xsZu~54^#ec3`y56 z%_J0^IJ{xMR(}2E9PuCyBw-p%PgQ+SV?_5MlkcVn#Vxw|vd^K5TwFON)|B%z*Rmm4k}BL8Y~z*rc2ltSWs)mOq^oE~MS@%7Q_ z1S#d^Zv~&l1~a>yuR~px>dlS&UzX-$V&p(nzp8!)WS5+PEg-je(X{D!D&@Bss)gZ` zn$IY|)f@Al%YH2DpevK@n{M}cQcm^$sW|by{3W!9^!-vHu``XKdG15r$$=D4Zt*v{ zEZr9(?m1~`mEm<+0kMijdLx`b{&Yk)%+V!o{7ohxmhk;HgD6$sKeJRWb6KrEsmv@i zt7dX0!NK9Q>gdc;2?}}tiZU)Hi)xu}O|pKM0d$ z(3T7iD6^`wi5}Zg)3OuS~ zV}Npc(#b-;F;{eHr2Q8p!#7G_fw-KYYbn8~-Lw^QvyDILJolf$rcr6V))&4gT1OE} zL7|TpvA;ery(J&+NICc;)M%9P>=XH`Kf#nP1>a9jdLOPZDo#a4@W*VO%q2QspA=H; zKeg(C_?@|xkV&Qm#t;l&>to|S#_KxuohWW4vT9@F6Hh!QVH;qEyApmE#!td}xfp9c zq@t#$fxAYM@_6nuPnR}93w}Q^gh{A&975`2^#CWS0=%IymQP6gher;1v@SCf%GgI{ z<%8So;O;-D8_55hA&iuH?0L?=Jy7{@PFIj+NkXp0zO?yJzDUWi-P6f)dGGJA(PH}g zxJ=1BJW;Z6e^lcG>HIwxn1J=kl6l>eHlB`x)IOxB#BB5if581Z@n_D9xmvJHbyZWL zJrEptj%mv#{Y?%ltm6Pbseobi!u4|$*iSF-Lm)B(w^2mkHsX{V9`T(`2@oqvO08jx zEK--3UFi5pE@#I)47I`8a^(JVgX*~Jo_h?FSJs#q=#34b`3^x0JBX!>$4xu0EylEh zN|$6N0C^^JaNGUjiHf?ls#tJ9o7>~i`dmDuXkGP5ce_tC^%RleQ!zbTBIo&l*zljW zD?+n4lU-!O0Oa)K79ubO#;+?!&c7)w5qy{~;;rEoejB$MLyvAw#!9#a>k5Cj2J}A? z3lR05iVUXi(-%a^0gBGeH$$RVjQ&ifb0h5F9{BeII$Ak{kx)_OWE0*82Nlr(n|X3$ zuFN8Br@RLLYc_@!hxD7EJ9`+>1l&BYHd8oi-B9^{8Q0*d`|syu>zcrGTl~dO^nlVy zxj@bfmfs|)s4ULIAKAP^A>S0PipX*=8lB%MU?&so>?BT~J@BL^>Uhd zgctl1MWX9g{CkrrJ?_U}+x8!=awEljF-A_K+k{%r=}h%YJfuG*o8Bo5CNE3oC(~e} z$CS!^O-I0!@Jx(3O_R3mz7jW56q@M_SpK!XS(9Du{1-&5zuICH)Yr&YjNsayGT7en zIU~mf-4CF9yphP;yyq82E#ns1kz?t{8yt$ZlkUMZo3I|`3=$_f@;42vA8bEbXJgvX z@p8RXiA&vgq0d}|G$?&_$O~AfFCr*Ygg$)w<2n=?*!oXPxB$Y|z~^y}ct)co7DcqY%pT!$QzjGn zxja^2i2j(EXE4Wkg^jvM%$>wIhV&ie0|kkIiUgYhk@jzblFUq$G)hh$ERKX^OZg9-y-sHwN6)2V*Yqj*=J-O zi4YeEm?w30)3H$ zd|lUdT?(U8yomYt9oN?#O;)EJSoE2YgNpA()}Jt^$J1>B^n@^N|GEd}j;|@w5( z95?!TW+z6*u6~>BpLj?GJO2;&OtdX*But=tv+8a};Gg_X0M8OpAjY?E@@eHn`;QCc zoFyu^Y4;4bS6AY*NgmetX;RkM+rx^{cTTEmWm4WhZ#@5dv2eE^&q7W75Muq`4v*ES z-lsmjltpi~?fu%qOKhR;)YJj;^u^Vr)q7fly%a^F|=43HV06`?iFC{fi!ZzO*j)MWT_|485u zS7(V#gOqjD2*{yiJLX>uhW^`4Tal7b*Uac$-p(Kwmback#tk5na^i}H*)3M=D+MPL zddHuBF{ix~3I^7e(f03(XDvfFM}yG@v^Et;Jnm0k{|R~GSvw()ZIZxmyia)!P6Uo6GwBiwMw zOKDMWzxkxw*E|EMN%^VBX(Cdmb&Dk5Wl2fiuTF^ydmb@KA8qgmJFFpsY13ZN^~f}t zoMp^cv8I@SCgeXCuu?0Mp#HDS&UkS)!<&YoInBVc=Bsr|e zCqD&J3KlU&!pQB2i=wuy^}0nHAdJ@i&2HSCeQ{Oy7$}93>>nC$?bg8S?L+hV$a`p! z0;5|@6m`Ll4^F8wx{nSzSKdoWcp+WO+#~O){>*<82r%_UV7rWf@ ztmyA!!NeK7qsf|)3sqV*@rLwN8*NoIaK~~b_)$dUSCz^snyo9K8+nuY&jO9q`_DP~ zOfdD3A1ZSHWqio^1RSTEj6Pp8MWDZJ!Naqxvpo!^DajldXG%~|^6^v=H9lCP>dmd2 zwOELnImgLJKpLglB%xdXZtzIF_(B$z&m$}PMF=v?Lc}R2Urqb($6NbVsynZN=TY^4~0ls_xK1WNBVRbrIIB|bF8~ky-w~G~pCD9;X z;)tZro14@bEsHzZG^Ys4xPys$5qps%8UuG)lUH{wu} z#jx)`Gc>V%{C%FM=5&1*#z@qDM6}E2{Py^=Cf|o0h=7$=ar0}EP^LX_ND(ok2jv4< zDqTLkiBo9FK`UWBvieCG*~bq-WBZTmOn%taF6jwC`cP$4MNB4#;E>5lB2AH#?2mqa z2Ni9?YI&&7Y>@$&E$%TNziQMjE*1!k3|7pYVB^qndNyF|&If$JO&^+(#aKTTgjzPb zk4TT&?(*bp2ccr*DqS6gk!75DMcBK<}}SlTAUD< zvKnkG5$#}!!$RGo!(vb<5lv#rk7P4Z_aoGaGruhB_@V^caU|r+YYbPA7g3O1(5&<7 zn$J-;Ol7IHDw|+5dtcG>mLjdmAoKe|gBUPfb3Yw=VuR!31l`3DVMQ9=Pnl+KpQP5< zp8Q~3fcYO7=$eC4u3pqF-=@^e(06D1bI6A$mYY%)IS}_S4&=96Z5=Jp426ySG-SV zJ)IOOFOddU&I$I>lGt|dG{S9-uuF+q1ikaxeCS6>Q;ewv5V(Up@_K4`98$oP!haUyy*%H0 z)!5sdLKWSCes%z!?E8K@^I6WLK%ISGTEK>kEUH2E<14n(J15oqGmiJ1wZmcBc{<$h z8BGI<14Cj*l{TfNrwi5YE|-nSp>MtK!1zcz02AO-%RcUhQrf-cjeF(4Mj6Lz{2SlP zu(PA$RBO=1l!6{nUNchyr}2b6ir6Y0%$BKze*UFu^C51NDrEoMrDLA9;uK1bMclQS zEp;Ccf+~c46M5&bGsJJWarvS?)c5?5}_5CM^T{Z6X%aX%Gvg=@|_EL&r&Z-PG;YB_3!1Y@ot=O4A(!vamlRx zK6<`SNC79tdd3ME{Qsu?tiL=I-#*oUPZON(=p16PL*uN77dz6zuJG}s{wF#h_+*M0 zMhqSJlE{!|w4tO-bgB*B-)JjPLrSAmG3Nu+MoQ7@n0M79v=|=}C~xbBP4_)X-V~#X zH_4e{mspY$3!XxLjIHaO>zr0h* zG7X5f>jiC=w4~5kG4D)- z%olHQS$b_1+@*uC&XZ;o2*2^UACLc+Q7N$Fhgx8#Q#A zxBofrbQD-&R+X|M%mdO#81aAvJIx9oc^IDQhbT_Vwrgja$&ynJJt3*mvR_Rtg8p>0 z7TfsrBu4A&gBPh7iXb3^vlT*{lmZ_;9Y8A8eC+9zFO^F~0XLYy;u|zc?ahNfQ^!-b zhFfa)O>NTBeL@j%Sk0vg5t$vnJNHp3;Y>T^S<(1DvCTk&BBk5cswJ;r$Dy;7gRRGS z!;R&JfW&Yuj!s&PmdcJT@ZJgeKVuO)RoK2AzCWdlKma3A*(cX@*tTxxh{H0S{dy0|7wL zjV2OE-}h%R9wA1hTSP+)^-zG44>^mvxwh%O@7Lm9W0vf@5%icPf))(Ha#x5+;J8y{ z;<#4Tm*dR;ws^{?8VQoeU$sGdx$ZIOKehSFhUrlhvx->c?B)MMMz_yXs1;k@f4{D_ ze4MP3*moy{wou0=qzDuHS!aSPQ+1w0>Q#} zv;stx-JpFh<1PoHD1`oxexp}%)YUzn)&)@wRf!GBc*ckyFt>FI18#o46y2f==$q{O z+~5kfJt<$GLld@A6s}YgxY}ZsOcBUDrI-i?$$+QW_?kpX4)zKhIT&+KgaX&@gP)^NwH?+erq_XZ6tc`Z;7bCwXziZ`y? zEUT+`{#~DBd~eAfQdl=VI^>>oATUE8V~-HA~0_kq`cA0afnINbr{_a>KV?jn~4 z`=MIK&5u%y3^m&&Vk)R5o*3AUqBrDMgjoF@+Abf0eF!Q48OWOv-&>@fNBj>tK*qnH zi)JU|E1WM8I9aa2E64VoV>;cQ8d_09M7>|I2#FtZNmfETQbmN*CU*OMJFf`r)+&Ef zz`kdQ#h-MfG)xD;f2h#Y=*Bw8>Rn+4r?FU%nXzTJz4ogmtlT*6HJ0Gqf*g_Z!MjR~ zr=eRxHju1<(a~+Z+hFJ!yLSgj!=6|*q_eY49gqBB+_){psG=imF8G0_UCncpl~|vb ziF>tu(hGQZbV11pH%0{)j+zg<%0l3QFU>8 zzrfvoq2)thKh}NY;EL4%2n{x?mgeQSH?G~V8J7ojbB7n{adL%6f*>n25!%>^I2*`7 z8*MqljrG+@aV*ZK7DMzuRW$?_KZOP0c!wcUxL6G}FYE&jIdPaSw;`H88>e6M?_64_ zxRK-YE`klu?dOobP;1lnZ@7TYzr((ZD1=Bq;jecXD&{3;h1x+Wdb1=I`V>8NC1KWT z(_e`}(IQS3Wm^Uyecp>zkv8kt%w+D}Q`fBeV;pIv`3rVwBd7dBh1DMZ~Hc5+c z&O+*KtMQHVXJXzc?9f~+z2;wu{)zAoTrohz?G`*KQiVl85hbbD5Nt1gI3_dw*p%chJQvP<#ggSDZa?CDdMrva;=(NVJ_&P) zWXKS8IFPMk4heG;CbKE60TvecAjx0Gw)tvJWYCu$WFZ_12wdz`Sp@-sccM2heyFzF z^+Ey_0dr8MQdbhni#P7@FeA)+r)riVIg!H<4PqBE?yuYZtHl#*`WS(g1$2+i`PTClTzk9PoQon48i-a}<=n0gow6{Vqe z(sOjM(8Gr9Q+NPo_jT$AwFo=Z9D;;GWViAW{W}NvnxfyK7}EbN9ETwtfy}ythAX-d zJO276`vk(lO>4PiUaae?%-}*P6Ptf`F@?5&2fZbfN2dZ4kQk*!>H_^of^P6l-QoV5 z77g@^=F=O%rPs`A*EF;+z~QR39RH@0Y${y@UvY6e?yyA+VC)pxqt~LEzjrmW!2cd< zE9rxr^-N0t&gxl8BE}i4OJHpoH{G`@R|S{|pt-U!=dQ6UG%-zUbcuICL$Dh`B}@CL z{7m!QQBEJXip1)gvRdvC0-pmO+}Uk8wkKMe>b5GkJTm0`y{(n0JSy$tFX zB1{^=`WCP}*g@!evgC(2am3|k4F@@hdhsRBX&ILJp%rH`29&*Qng<&gUTzoKii&1L zF5I|ltyw&Oovk!8zHnptd?Gdo|K=nvLLF0IVo03%t1;5ZPy(%4j@3}G;H2nh(^rYc zVZK!pRm)^2t+69yQ%+Zgx6Y37;a$i3Fuf7!p|dGCT$MYMYQ#BlJh7X^Tq$w600xH0 zl){Gxd&gWAa8p9F%Zv%~Gr0340%vCYbuyaq9@imh>1l@U&XC>*k(z6wSFi#fW@oSp zIZ8aOZD>^x;He;21x!HhC3P3@R9ybQ1wR-U6@-^a0Twl-h(X7lx-@4}(_cLGv%zCo z0bc1C1SOTFA9hp|f*V;SvxcP^4DDhq1&ElDhou)w#W`AzBhrPU!RU*ylo|nY6%;Oj zEh!|+ah^pQR|NQro zMQQj(w0G4R6Q>6`FBDp{sGO|t(9;x)(9l{4ur9_ZTW=p_q5&PBK(`q@OQ&_^Yab?>g_L2X3Ky~r>916ob$CKheqSKe z!;HNeGaqfGOF^=t3YZM@&hK_N8Vbs))P5lx2m)N2O#C5Pb-S1DxvUv@URWSBsi3=$ z9CNo?Ct54ix#`dnA_N3q(SSPvS$T*L8W=(tOsm^l^iAJS9n7(c>8Y0}m|}jK)Yf#2 zN7wROMRs@enW7daLcdJYUzNV zGrDa2k5USiXy+wie^pKI*Ej#KmvSlbm`F?ox&h{n6)nt1pN>moFW;hi7p_=y!utf& ztbJenwBDCdr>mpgmoSJs)Ct&M#-6NxJPA!#MJ~h}q}m1X5Vq-{Ds4~4rSA++>L)PZ zYuZX&Y^!nh-3BBQ!w$Ju-Y?_qhi-Y|Xtu?JDUTWAD@^cMC&HL}qZ zH8}Ry(7wP{RZ$vs$!l!wnO%?YtqnBC>P!ax?!jN151Y<^CxziSqxva<^WT9U!|IdT z{gB@H{~1uQ(QIxQvCx4ly_PtZvkbN#Q#!M)|4<$y%SaR`@?E@4~kGNw6c#TEp8MNsWbZ%Hl z){5yN6w9g*%gsTm#zw<(c7JwkVRJh}Q6NOpy0Fq>Zaa3rz!}IkCGO3?vLL*c>Af*Q?^B);GiWZB% zeUU_tuVZe^nICLz<%(%L6`M2EI$3Q>yc(;!Y6<&D0u>1aNW)0bCx5^p5>z;^ty*y9 z$AZ!K+m}7;PBvCoZLEjZ34~(5O=|LtpKE54-GZ*bnM4`Jl}1oiftma)laMirn;n(f zENX=8$im$vK zzls?Uwn|>~p(WTTEH0EhAYhPX6IS+vnQh%?2Xtx1qv3&F01K@PNP-Ifrq~Icf&Iu& zTl|;L{YGh*I&CgpWJ;BF5Pr3jXn=EX#HKdPNn>Md>}i*i-NRHGv)ajuPR4hq)H(c} zwRkDxul-`xakK_qY=?y>QQ)q1Zv~&omj3RXKVqsa;z4JO7pXA1eGmDn_I9fU*6!FKiTk9cc>h#~n9EV!#0Q(W4X6Lc>AV1fEZ3jjh zH~3+39^CBT433efp`t#+5`iTsjU&yBPurcMx(DV7+1(m5EQ%XSr-{WZx%+?JnB*)~ ztcm@=-!LUMG|CkWX9@v;CQ(W4S!B*&glhn{1~BS`*(-4iaSO$UrrplWE$H$?cvC1) z#SV-K#LF0Tg#JMnYYU^Kr3?7JbxW$g-IU1b{`$sqBa zfmMFW9iNpiWFedj@sHeg>_Xl9KR+JGUUicxrnGFH*BcOooo8==IPmZkSI6^7VS@q* z$O8N=49ajqlMX|gcSluBPi;#Qr8(PBl0tN1l1;yw9gOx!V^db0X2yncjqaqvv3R(enLdjD;BK~?-|NGY0xnGAPnb4W zp!a~$^Y~?eK)5maS{#Acupp12)OPA(8fQI6dI+Kh_#)Rsb3fdnC>juZixLmp(u&kL zM0eNCJ-1G)PMV%M{tgAtB(CZ|3^mF4&foj59f3GRx!mt+7sG0%0cr=mI5OB$s@}8# zHf+#1#9#!WhHbD6!N_lrfYfl>h)lvtflUCS7PG}ig)d9Fe-8DZa;U#$9J_+-E@LbO z+=`V9S5+j-4xnmCG*poQ2$2<6kBJTMzD`DcP9=Gagz|fUNWDm`W&NLOhW9c8Glq4O z!LxQK43kB-V8-u~+mr>;O{+Gcd_T3Q0D&dKU2@flg=MDaF6YPLON=BY@yMJ6 z?0W!Jv0Mwde@xQGsmHsL9ZHg5RmTDe!&M)`Pz`5VdHaqCTpt(3-o`Lx)_8|*2b!fs zn1DRYMgUT0kz(5LMcl(*zWoX7M2BtqF?gD*fJX>ha1yv^nVxkkkXkQyAA>o}RN@y& zIIYf^h#|E_eivFSG?#`*Ljd&dfROsET&mTpg-K)Jv~sp~bsX%NsHZ=|M_F~$;g>B zr8?v3VCH4H)Q`8-v#lc15Axf6E*iFEQh8s0hk57j;XqXJ5W8Q>kn;p*Z%=S$=D8#+ z2^#C7$`zQ3UJ?;&kP@q0qTWhapb_SU4jqut%7x2;%s5ar5KV@cp=-i!U1I)P*%sjr z(F|_Livz9o4S$9wCRIXcj|! zvr?=lxXfiY=t<4SmXH#O;Is_zS|&NCsUQd-^p7tdA}(_}(q73-ze*au61l$HJvR~Z z{FHp*cExvn=_OMD8==YLzDUh~moPDM!&M(pRH&Hi37&9MTr;fPjDl0!7JgngXbo-9 zDS;X{(B+L`;}gGM%}OmX#)uosiDxQts3=@AP1Vf?1}bDfAqov+{i_7g{K$!~zu4 z^3ZVt_Q1KsHa8|W*bo4oMi%Ewp}25orkZrq%bN(SS<7qzIxJH2VQT;+I!|83o`0Qi zztF4!i{;6#XY;0AwUII`@rVoiu*x1ulg-BE^)p`#IQ}9WcC3A3LLt{lq#}l7p}8mk zza8N2P#+TNSqHEh>PMv-eHu%*h*~+O%21wjz=BTe5Gdc~LWt?|B5~_z{nHu#3w1|> zic6$#4{b3RN6hPx%$!)`W5&4NSgCVG05yJ=-3`AHDrw8!P9UU~C%E8+2zqns*`XeH z_hKAuCC~Akx9L2_sHE&*@ZL)xrEB0!K?*={zA7@U^$ z(1B5i)8x98z?*VCa9`0U;eZySs6%s80;=L3cA0X|4wmybR$La6QP2bQP66rXX-XKm zaM`oHpJ52}!op+=qN10^^p)%Y@Y7wr2pQ1*?8~=}@)4 zwY4mQ7B_={R$vYN&Ank;wdDf}bt*S9<-Pn92VNJk511pkulK&M+eIm7&HQy=7Fk`@ zSNAozg+!osKjE;7pCM}stwUX4(W_g%O;~wH#QlUKHXsKa?r2W;j)Bjeq!_~|g9X+6 zxi~&O4mr8p)=gD0nSubAOu~e9A$Cr6FY`-#DPfxu^tp;OoNfCFTTi6+PfW}qd}3FmzT7) zWt*=@0v&Qx>n%{yM>`~x!U(VjJ%!nb@o2CP0Ljt05PyGAS2wZ$z^HWMMzru`(4&80q-rxbEWNC2Uo@shsR-RFY1M&FXaQ3R*l? z9UVk&3`esi&COLZoVR0>9EYV>!0bX$&3ci0m#o z71LQ{gMR5SJlKFZLKg!VTdhH~P(XxQL5(k&gukrRXUXQCfG`W>dI~{!u4dP`1cZ+L z2#&l$2v?CFocK~Oi@tS_JYMy*bcE{{C}(WOu9IBSy3j)ab`F4Pg$I1`%?C)X^%1@n zu0^7F4&=H{7Q~KMcz5=s@vUV@Te{Z!9`yhnvs|r!NV;%-@Q`-y(Iaw(9h<6Ome!-; zPT%%1YiN%@kb^ehNMJY7q&3o~gRf7aRZEKBL|&{)Itm6IEq+!LUGAJy=yo$)TEv`X z4XX$fgCsrJ!GZpnB*Ng0_=LfK$Li3Ws`dp^feb<}+vyN7;%otqS`_W`>>QZha_;F? zQMJxI%|RJLY%Z=S4gc=j67+fbZ)o>cxQ@37J$mOY$PF`n`=XufO}D*cChSAkj<4n~ zl|K?M9Nx4~n?lh{Mxa*j93ltpLdQQq7a*&fY%}0Mejb2Q`3HG`ra1j-3xw4^FVn*W z(~6^s-lXM&!r~P9q>G5^f@&~ELTSjTiMYR+di~92?{AQ5uW7D=HjZl^(j^r9)EUu) zxgst|aGfwBTG@fE@H?k!vX}*!XAR1mXxfhm9@4lvc+I(t=O?YQ*rM641%#RrzD=?D zP0?wD2}3fmsT0hO?AD}rkPH-OaMO9dx=t=^R&H^z1%She8`f2L%8FCC^(k}Uo1S@f zGw(KAB2(@isW0?B*3l`*Ba! zep!lWF5_C=G-DJi0iDmP%XtVvCiq*zFxPpEiFcF--!m{4H=-iX98Enz^4 zbX&=IP*Jk2{s3wD3^g~=Et>1f10Y{_tjb%EliF#tQ%^cT`kJLOC6 zuj?^IAnv?LgsC$srrY|c9)gSNcOUw}Ao{mOttNeh;qM~$Yo4B%=FTd>N~R8ZOX!Lo z$q5a2SM}QU3&*8ZP1hQj_O{fb<1?mn+8NQ)2~CI_4B(N$VKPm`I?`4~2yrKnyo&Q} zc=pw7{Ow_q^`^~-ZHWOVPN*=Uoo0mOgc7JR?C~YMrpLe8gEjo?dS>`SJBv-1+J?h7 z*`g(cC-Aq-K@1Q5(P_ew)kJy2=i${Iz_L|Jh>lYWeNG931phaX%AY~JzA3S63`#j~ zg6%+q;0v9RgPhT?yaevk_`6$l^l|2=NE9TYip0qSIC7l#z6HFt->4gL(c%S80V1ac zs#-QDGFpfZK~_GoX$_JHL|w>)m>c^l`>YF6x5}T{A+u~&jNf)?DxE3D%g8TdYx&Sl8PNQSJ23^_TdJ}hT_RXB zEqo(7k;Ku$eLOE-*uP5|efXvmE^L)Ymr3oezEXUl=*ZZKdBZI8zbiN?PrZX|-PizSwzcYt|s1$h$k$_TF2f|72 z517IJT;`!s^}~ymawQiGr}TBC4k2$!X@Y(c`UVQ;!8ZVhjYA7@T1MlCk_54HQ%Pvn48+kh$hgvLZ!O^d5tI!k_TOo7(5-hAW|cwpjNoROm=$VUdaC-mT2~3 zz?a6lZjf*(xBnK2Ye?3_fd+6#n$Qf%8I#G2L5bB)Z(+S)cBhrtjwoG(Bw=^HiKa}7 zEYJb|X!8NgAuWABNdc)G`gJhEVVy%>OL%-HBm!76@`<}0a1Ab}NLUA?$KLg240rmh zlnwSK|6rllza~iU|x%q{ILu zt}W_+9vX4GD-K@n_5e`}HaCAzB!Nimr(HXPUbH~Q0WLIt^VLR8htSyH?dq#K=DU+l z{xQJ6MbbA9D)F~XhjuIe`2^E!atp~2?}P9e1jJD(;=FovW|c;4R;)cDejndus&2~5 zsT=GLS1;VdV&5nX(&^h$plgP{6~+!e*~Z_K&#|XlFyFyGqaUu?a0|sLjyCE9l6UM= zj_K7UiDd)V9(-k~EAs1M+76OQ>Qzo4&IwGVO>n6b|TdD*Az$l2>JtF2t5{$>bX&x4_(IzFIh|h$*KS; z{m@q1Bv<3G1@+3l-&X2$^>6EIJ;{LmMeLaRKpv%Y7nc;W=wb>K6j9=POYx=;BfT`F znNveByAK1_w~#mgA~D2RJJ}XeN8_CyWJYw@%#Q!rtvSk;;8Op0$>rsZdXkOIk<4KK zg8qyFwLWF@Jw;E+l*^;f)qT`1eL3wT=*|%fSMAP^04EeTI`kTAx0RH=IxtP%a=f&* znxgxOHU)K|GTDXenO^Wc5VxKB7SUPcpkD}O(17i~czoAkrY0>2o-N9qx zbGQyspPJJgV+VR;zxVI1A6QuS4eH?{!hsiFPF4><(AVQ56D@F(6(Td$TU&uhuQ}xZ zjgyTHPTy;(8!YB(w@IpO689(g5DWO97ja&TAoh7MCfDlXJYVb>L}F=ADu7~Ve*D@} zr#_o>W2Z1>dWs{u+N78`ic;4r9aq_|dgTOhT(urxW%JdqdtHqh7G%#(B?%yY#bfnO zMBkpzWCd<#Lp)2RA>~;NWLcHxnWC3%*%HRp1QmGgN zX~qT=lNY%W%n2r%5ttVu+M=)NnU7-oMP#TyW$v}5M|Y$|oC-tD)aGerr3s0Qbs#Jp z;D(x0Ce{6m>`m6Mp{m~vnTAAy)l{~SUUaYTbTbuw z+apxQI3N0LF?6L0jasPCj=79#=M7zzb-+!!5-_f<4%(77=SrKlM9(>8Wd*7qA*2i3 z7}hk1yc|9ifFr7UEV9SseZWJW+58)B3oe32xau2%+mO7#^CYLB_w1@rv-EmC%(c&Q z8P1Gx+Dgn95oT_N`T&Q~Uxve?C{6*!@w@+5m=9u#o%Z0@r#O25OaiXu3{(}HTM)Jh zQnX-{lBJR+_Hw~VcbMl<<6+aFMyMSqN@LqB4xStgh=H_5Q48c^9f`c{umrET#f&CY zEKRM20X9vOo{Yk4u&pnl7Px+&`5(t3QU4(KUK2m1CXz68GO!pX29euW+Ywx-kl{YX zOPmU~{P(tje3dHo1s>jS>&PA*oF=nvlYzgdB93stHLmXZ{3D%0Yn379Bn#u(X#N^@r*Dui4(kIGgWTl58@I(t zobh_I-7!Nw$?gI>BRgHKA9z1S3hSP|uo48T(VHiKf`ejEf}nJ^1(sABR(q1KGzJ`#Cs=tWDz-6a2sjxy42!jyb12iy4KI0cxnR4fk+?tlRLq4U@L zbp}9RuvB5y^DWzBJj(Ql2F*7doRS215BjDQ*y9Qus<5#5>ZAH+W8hkMlBxcV2!#dj zyu}2jNHP*>zPLSi6@B{uF{@KkyxR13=;h>57xF$;%j7I2RCP|vTpC~hMw1SO0tC&7 zw%p?YkMs_Ewc0L^fQxd>5TBoX4C=5cj1i3me8jR%-=?0l4Te)5M<`t#|DN-G#?XH<>3#TwI}?sV~G3P zy}i?r$2IM9sb6;#9NfnVH;3)(Rfgt&IG%S8 zQ`TTwbC2*&T-P;tRwW#Np@D04dcJD7>I+hJBG1s2ez1xBFhhs(?ZrUo~w_o2MzwEPc#U;0YFwA>dWy=)W@pYn9ge zH8TgU6@a>FoA!6Amn&mpH z3d1R*PV#R%Dn`rSW9&?&z^DHPrV%BDovQAleid;*nzX5|PIWOE7?!Jy%mjS|4S*y@ zr$)ZPR2sw!guwW=| z#IZY}0sS5cFL-9m4OT$P6A0UCkD`?y%n5Fhlx7IXD@eV%P`HA3i=|lM;E6$$MMHq- zVFyoqCgER3^%P)i1gqB)SFG=uF1OntNo!eZ)qH4!=2e6}XEmF_)hUukdBz4ZhNN9d z;-pc_o+tom4agiPh2-ozoX87lzh2w$h7u_dwgY5_NkE((;!5E*3NNAxU}@R>J&~$; zGr)e-V>h)^NKG&lVapHN35UVP4AM=sXi|CCL~8#BKVbmaWcS-3y|8yx*1#bQoimUy z4rT1iZ;jB6Z;P<5-&HN$i0yvrWk)n_riwp(6sfbgChCXtVs%`BNQn%XV$%#GI7(!!2>CZ!nT-2_90bfkUPy9i*txI@&`SxN%&`pz zGzaD&O=Xftv9)O`z1ZzCv|2+Yd|*DG6UQNdtqfH>I=+ORuaTjUOt}%2OBP>|Cr(5}GJ*Y)+X-Fkv3#g?{!n|fW$%!%Cl?Sb{1+ALQ zaB=Ih&n%4_cdi+);MaNFlc`dBM`?U z+fTbSKfyhIN}y7Ux(%MzE#m`#>Z(Hty;V6(*+J6l` zg?n8)TZWv!AUc3b)h1`*mT?rZ zS%M+P0@Gu*lT}|LGn-r9zemP27t$X3-!cG)bpY8AYnM0Mq*HDtfxQc$sXCEgc~~rT zw{_g<)Cj!XCbyc>x`(gk7<;0$bqS9;izfXg~+0ucNDcH7(B}iX03@6})S2jq7$@rQ3`1 zI2)b!@HkpPe8YWn8BpV9=Ef+ys493(WeDyyJa)FvYMeyBl_+0~z zGPGEU+U+N~KDH6=wIw(xSm$zVn^-a#FN8KVCdqgfU=wQ;9VUXJ&ja2sxgzve5fIaY zunIfa(2Bx~XA=&X#w*C)Y6FfUV(9{-E|`sWlclF|mfnLKgyuC6I85bim+;Y+p#zb3xi^mD4J(ddxKCb?;s% ziy!F56J}187xfPhcdZ>KEND1tGQ5LCPrB!CIS>w*3$nrle&1SnKm=+UiB7CT|1}Lh zVDkK*&q4X?n7B};Mx8es2gH^Yx+{l#t6ovfP0609sN%e2265y?3yqn-bfB?$H#V8M zB_1!la>6~JW`>zJYLwt>ui|8GsP^4KG7iGEdoIx)W(M59#nHt(xq)RpluovmP*(3B zUe0y1n|I~%zd9}yA8J4g1=Gwo!4xShtTGC5B9iKh@tX;o$^mb74h(MALq#B7X%tsr zV_d2yLJ{ZVUdUwvko;!eD9(&n4D{ge7LgCTFn7wu#`j}wku~Kum1 z>q-s$U371QCpR^1PnX!O!pAukOUOq9J0fj9WE&yb;75sCd_kEmLbXiHja$k}Osr{&|&=e(Ie$vmL8sX^YBUS(2JFxo_U$@EB}nPzREBk>%47(ltLv%3%i<@@}Ib z?}gjR66kI$F%AL=iFU&H8?0|{xUSFAmOE0h^(FUXEqmB-^2RI+3YP?L7>jhnGSE)- zrc42pQTR`csJv#9`Je7-G_NMXHf1&IwJHez1&j<7m}WkW!?BzC9CIP%5dJ=0ey1Ft zagIm{N|3ZMEtJH1c||3c%Oy#}GAxgxV$T3ey78LR(2K@mO{Dw5Z8^tkBSlSzjChoc z#W-jY!E{MpM@fy`^VnGGQn0jcnP9w{j_cBE7L2^`8Ib#Bt*K}K7Zaq9{Z*d5Ly!bR z3j5Cqx1Aoi?g@V@nyPf1h8ScB0LT!n>U`4()vrWnnh_g^*8f?v5BX^O(Tz^u;q6<+K;}W zUXPr@Fz_*xQXpNn%7mG}(iIq4n`4OM+$H$!<{|>uJ)%K(GJgo^)Ej3?!1Kf$|ry*bD z&DcK&O8JJ`N1zT?cLxDGRoD%ikajb%g&o?-DSY6w*#TVd$_XwmYu&@r3J39-03cbVw!;oJM z?CA$P7)Q&6i1Y`Wxgn3OU8{H&+f+*$Bl2X!UEKLf3MnuWH{MrLzKoEBE0DT;nGHU9 z4cdlr5REG-0ilW4W8$gq>(rA`*TjpeN$HmVcvl76+QVOgk`F#A)vx}mhsst9-cao(UDyV>Q043CCDMR5*D+2JUiQK>86uH*`0|Z()?yR z2p&&=`jx7rx1OZcao^IkN__rghMSO@&RVzae zM(MlgcntSK)R_}4P< z>c%NImFajwh~O1f8aAc|$9nyUW;gqvM`ch(!n#lwK&8Y)E0vj)wX3n7B=QY?LQ!y= zkQYZgqiuO}^>QH_h5M0RF zmEONx?wH1qAiVZAR0;r&6z<2o3(g;Yd@VaM{?l?!m3j-rc`8kXN?=S#ucAwlCzQ&B zTA~trNuh%Oqg8egWUvf!-x46YHk>Pw<|=q6pzHG^IXo~D2VI=|oN{`ShdS{u;U z6=dwvyscQeQsURL>6u&PGyu61;ZtxqSbxF|0Wq{jW!znQrbk)Gwp59+M=MeCamTd_ z=LTRV{qE()BBeV@0u+5wvOeb(q|lI!|JUN`cMA6^&{-6Bn0W1(Gv?%GQ?)`fVZ^!9 zE81!sR$%buYWx5W+CG(KQC$46hizt6=1_~)(WLcUr74t{)es4S{R?)PW7q)c=YtjXrS5hrBIOUc(91GEO%J=Yp7cR zVj&Pt7dJxSa#S!9Tm~|l#>50cleCaPnYD-?#f>LCMkS1=gl?cPpwM^i%n!bPZ|5IYaQLSgT!3~zI}HU zzVj4JoB_to4=`lQiqlC{v{;(N7Rhlth6D~4NO0TRQNTc-ufPYva%^Mx12*p7m?#^! z`i#q|c*>s#fTt1$HsVTF;2+e(Q*f%i@-cpBf2j{z^cv7g-ed(r8Drrm5=C-9WgO$u z)RWVQmyzZUP*cIaT^B|*;&sVB7T9$B8uYs$%l7a$#0<>=_{3owSCMIm!=wkP^i@3I zv#y6@@1tkDF+Ddhkd_A6mQsJZoa2#(b0hrlSPUc|FpkdbXyjAZTv*#`UH!kad80^y zMG|Q@b7ig{4CP(BB}U6yr-UdYM2fFG^Tk7a1!Ojqcv6&ud4Tg6LI`^aq9mS0JxtMo z=+MEM9jhC;zgG-~2319T>PVK^F*RQ53E(+-;-`a}k`!X55orW$7|O)D*v8g|p$9JG zUeITYQg^_{0-aPjyVA0Ij1yT8vZqY8l5ll1!Ns)Cgi>u+SoF9{dvBK&?A8oMCZ~?I zwAQ)cGV<;Q>a(kDl_cQWzP_&hC!^BkMRESU_Q4kY5u2c2tUy4QsHnt}W{5EG>FNf_;z zpcX4Z#O-ZJaWjO7eiVu@?+U2B@!n%ovsr-GV7rDaV!Wx^&B?PVLoy@^X3+Img%ljE z>`+4!ZUX-r$Z)%BINC7`;p1;LxbjnG-XW&(L7!pQ51Kw(o87;(8}$ zi0>v|QS~1Hom=o-X>#@eY~mcEuCV>{?3}*0AH9Sl4f(6WT3EUO2fVk6K!Lc8r;DHB z)>;JIkq|WUj6zMg5}Q_83u2XG(vFg~0AN~{A^@B+xK#u&LM5cVhmtLAhM|4>a=5C? z{5ocfP7DT26#cL$d)oj?h*nsTXClL2gW06RBH?zY*D8}rXz1!? z+N>4?Jjwvvte>|-;rVG17|x2=y{MTm9vUWhzeBKffOG@<@Nn6%5d;@r9Ul(IVVx1*ov<`MAN@IvuEE~35sO}3Ro zi6Ru}$Yg|6l#3&7-G~o(SJG;=(i+JxoPC{fg@6@vgJ_lhOCA^8y)T^sMnCmVy z21f3X4q7pAk>6S_o{@2gDdk0EH4`ek5w1o(=U~A*-Ya`RFGE+%jT}8Z7@i=iT~F*< zntrBQ!Q0lqHHb#x{YF<2e?BL`TjEW3Q}%=xG*)Nh>j?LRwO@CM=e_b<^4=ru{@Y8@ z(_4i>uPmir2K1&@zR`(%C?WAB$VRQLW<4AsCSegRkK!X5Dz>r0`i*Ci#$DDKep6ov zQuIF+Q7kHL9Y-l?^%|^i4NOG+>Wkb^Zu~o?yIC3{C??cdYjq{Hnc)t^N_hvNsHiRE zb?ATvuu1VwcMn-_X*Hbm{`w4$RHpvs^gO*Ug5RY%KB8kUKJU@RV2xDBt%UzOp|G%8 zS=M2GTbocg(-o}C1yCSf`dQ5F;mw??EqYbvV|P!NlE9N8ZbjMQ>F@BdiGH=lm0qZt ze@t5bqp-!`JWeEvtMoX>u@z8bMUY4I1DU#T3cik=){;2diniYvSPcbx|KaD@#B#lL zCh;wlU|A@+)LgFa@63cJ5c2OC!}jO~yB$E%C)Y-`nL{E4JvuyHILp*n zK4?w`xA;orXYb5Sqku)l2ux!f3U(p)ST#<{O9NOyw;d|1o(;%@oSN#w#u^o*ITHms zro>2EafrCL5Cr;kC=hADXlw@bzwr_J?gGYIa0y9yWWjS`eM9eSe8@1b+s3Ooig#0i z_=thO`)AOjISEFcb_r(Q1+*aHwF%|QM^%bVUF~m|Jiyx6Cc~F#`Xm|xik+E-BbcC1 zyEaZ-Y)vZtu;-~GY)qOFAj&L=%!ndsAZ56dVwOB3_y{ z+zJC#7_=&Bfd{86T7^C>;|O)fSz!vcght#N79mPuQeZN!bWOs#_50lZP%B*hl6(5p z>?EoKcM4GE?UhdbluDIP#rH8_2Bhj|s8O&HIIBKweH4V7P4GW6A#$NNvq$hoeHIvR zber+g9AX=e<69=3LB|p5a$emGLdMYH%z`6>je==R^0yCCXR9eO+i)H|m{LsXkjSiq z)apH2y4@~6(jqG)WZ- zjsqONXoU%7Vj8pKdEtM}DJ@D#>yXINvb7Ob)t+g5YpEf^zN0aURHTqaHMFgD>6of6 zG*{hf2~8lrj!(MIMJcs|ntWo8TMK_3UAEJaEEe+c78Egx{BLCH+UJ}-xV?7WYScYk zq!B#@Bh>z*DXQYVrUnYZ{&D3IPfSm|rJ>Qs$msLfM{2uD9e7sVk0gYDXh2?*MuKZ5 zK(+#8(nxU41jxR56qntD)tJnQZM7iB=?!&ThMZP2I$SfWy_%g8`c)+5wu1~URh$~r zWWAavaEK4&AX7 zCt0x@@2VN^ni6fO?f=xq3S|s*Mvy@+`fay|KASLe*c}ehJ0L0lr}pFjlkD*Ue*t0A zCR0J3l!afv<$UbmycNwo*dpOBHp4f~0qdaNj*fU1N38HU?eF(8_Ys2pecU~LZd(Fd z$u*|0>Hwqx$fpE3hTMU$f)xRtAXDcBv2Q_V-o13xu1`Y`yd5UkQnKnF1(!apY2-*a zi?&VY4+0NiNkR`E=`6Qc^B@cEQk!=TfUZoKkQDJv>6wj@uoD z+LwCFR9AFH=n{wLD@I9KocXXYg52fK4Y-)*clxSW_&CsrPN8_qpsLsd32~6TItmk^ zo|;^e+#bfC-M8HTpvn0_?=LOm)zHvTEsK8Tf?jrms-aYMT;qcN3rW-aehg23VvXs$ zTZ+BXqRJCwQ&lX}%~z-#$Ti=ohH4f%&yQxWT~jLA&--@}T=Elw4vbivIbr)pZ(Kn5 za}x(|Ih-rPi%$T=Dztwikxn{Fu|OOBZs){0a6Xm)&+O`>JjVxZ6b*&u0LcCroSNg% z7&mwufxdVWlfxK_p;vI8v>9}s6TXTBa50bey1^@u0H_lbg zwE>7&!(^=hW(OM>3eQ1BAS(Kt74#uMv=OE+t%gkovIw|haD!62VLZ~Cvkou=yWT%o z){U^-5HJa7Lw_f*g6^CKCI@t<7XvK9wR?hmGx$4a(J~)OlNfmG*eEdt4nH5tm9{4L z&4feGIq}Q}6kTOpQ*Rp{-QC^Y9fEXsi0BYTiU^EQL=Z=JcQZN!q-3D7(V-yS;AjVm zhz`*4-}dhPaGnq6`JL@P_j6x&JaHYi55$v3O>xbm^y+jr2P!Vilh5*HC_yBEVGtm8 zWEc=j6zd59(0UUtT7V*elUxy?NKWVh021Pe03?7h0DyCnaB%`a07Y6*ybt{ey(`t5 zIqf1G*e=j9K4QEnA$inT>OixCa*L^g{tHtD%lM*t?%#^T ze?%ZUdrn23Yg^2b73^X44*w@&^&|&((n_8YL&}5P$Fv@1#(K#ePI|V_&699{DoWK9 z*3#oKWoIuL!Nn^e=BkE~Hb;)${yzUj72fTg>}cuTy~04fJ9XIXwE@i3Z~0PJ z8K`#V+(pl*_&%Z^ChiFh@K4J2e{L?!B(`@DD|jvTUyMSP!;eky$P=@9v$OVg%&)8V zDe@>utZ7%{mpLD!y3gh>X}zOgDh2LKf4THc_UyOgsIjUgUGCYW!E)XWuKZv@I6`_s zeDNz)QFSBq*&|>p84X#I&XA=HH8Hd)KGTLpeX^!ji#JHv;yJ)K^mFLfr}p1{LBErq zaYyS9r(5wTC-KXRQvQpG+qJKGbNIckB~m5etJjc%RYC-8s})6H@<}vHbD!8z!O9&U z8J%(Zw>kyP6@@MEaiqFV#Y{BoD)klCH>oB!6H#T_0Q`6U(lQqO9Rf~6k=9?;vq}I{ zVb(g)T_s<$A1m>vhw8zq^iS0rTZIc#QU`JtxSTu!-_2trOjKRo?aKAL`t)k~-n$)r zPhyzquLJoW{}SHX&4};G&$__zYBDJGsH!~Pj1Py!kxck0zZc+FF<1UTz2rl6%)Tp| zgfeI>>9lOlDyU6z6*xwR@6rd4GNqyIw_i)fu9AUTmTxYPj)EhHO^x?{ZJ#oMbAJYM zO{dl$PK9F!OgOw37^+u~C7-M)PHD71_!yZ$IgtwbP0KQ!$FGqhlKgl z-T&70Jxvsc+vM`Yg2(^Lv|Km6rU4af9}&Hv<9A0B5zX@8s<* zwv)P`whu3S#~i;2E1$jmTFHA~BD_^2KYjVdlnyBk3E!^&hhoMz$q&rFihnb|ZFpnx zxo`9J!;ZAIoFo?Dy56Zyv`M#AVA6l`maC}XB=wHu0dW`D)m`Ud+kCA!_|Kv?x?rs=`{A|lmO z(`AWRpv{c`AV7ClP^OB6^G1^WEo7z;G#4^yOKF_)IM%w|RFEY_>n4DnX`j0`ULMp@ z+vH_j!1giUQ%$ien8bVi(9#jCp-iM5dDyV;zy7{f`R#Y+r;WQ4o&QB{N(-yxx`bbh zIsPDu4*pcsF+bHevJ`!^Oa1%9WgV)l6zG&ABp9d4i3k0W5a3eXZi#dkwgK#QLp zYMU|p2PKxqkNnmE|C{j=bIs&Z0qh&SkR~(mWPS-CxVXG2_v8lu*3(3xh^66qDTqS z@E5K)*5V-kf-R86c6}^Oe%uw_C#we6NI8V@upDc~OZS9a$#M3pU^PMw<8TR#LUA}P zkXI8}#6o&mk5OJZhQ|qHjh7x2Ip3b8+`_X)RmSk7q?h34IDZh{ElUs-r#OYKAu%b% zvDG(N;kBW|1vnm)0a?qfzg}@S=Yo$4K_+uNV$K!@!qc2P0QrxQ=jxGo&J92_3D;UX z)R-po=B@ei?S;b;_JhxjDUr!9lpzI!t1sV$2ZI`;Pi4mFGvuroN)dIacLKcm3pTFlNTiALDZv$FMMSt4?IG@JlFB>Z=~rH|Q)6P1{Gv`ra?xoZ;wg4iRk>UOEwag1vt^ zcDqKjb*m?wdvj6q70)84(Fqz(%`c&A{|L4a^ZW`kIQk0X8@s*EQMZo72l`*)DtKSw z-@kg%^%Y^s{@^fHYy9S|DXSJSFHkTnfsT^>D}psZ90ka^u_Gh9@Zci5K$cA1N@yct z_N61Hv?;N5B_c)4^^FX0_RS!_I%1vm31!p6TsQh%R~ zG%^uLgo(f0a_mXj@LcG%rr+2ZFOcn1F;(U=|AA+Tq|v*j3UOTGVwB-qA0h8Pe%0ee zV`a!wB;MXtMFBEu{^4E(e8lsLDj873(^!54>qV00uidIkNCD=Wrqgd2CWTu$1-mwS z$PtjVC=?)1((fZ!D9($N@57CPV*wqMe&9EJVn6)`Ntz`Fzj@2W^%X*OgO#Zr&r=m# zsGR^1X;%4*+z@QAKswfl^>OOW`Gc`rjlK(@N!Y?aXcGHZh%Kq>#_R%d)Sro4f$*_g z4WL(Ogs_)?H2-BHpx-om+aJ&N6{bn^6+v%)0c6sN#6MZSEw=s#kNF*a14;UP3!`Sa zC+u&SO96^R7Ka%A!jUPC-EtWJCbG)k@=m&Wo0NP5v1K;XGT;D35*d*xRd$Wv;zhEV zqdtPY^`r2ee8c`@={G@=lege=W9FxaS*+=ajQZs_$O)1QkF_Ig1tT0hvNr^&+Pe{; zV&i}P713OA6LdCwd!4dPegSOY|B5g?x-~19y1gcn_zL5G^$X5N{OPv%>KOa2`xsjx zo_2Fy@*A50{DV6*kHRxa4&m3CELjV?i=T<&L|b zpwmG-B(|-%cNy7d9?Z{WY415Agw5=?;IU#?u>R7ewIu0Q1g%A5EhF@(!J9~5>}ol$ zzAq>)4U?LEhCfy^c=P&-j&Jqt>ss#zuKDjL4pNA=I)C|4@V$vISUG26wAyM~IA##H z-m2RZeQssIRM0OGVu96|&1>wSa&m0L$BFU*;|o_VWX~*s%*74P2&%j4NiJCWU$x$5 zx%uxq4iY4SOhAp!2$D8(@`Y}p0Y`*h0JA?$d_m#ai%dbI4$P3OgP57GUT!{yE7o`_ zigCr%>`Fy{xtq_}qV+X7?Ls$i3xOCMYtfi})}P5D9;6YOkFn7AT5?H{G|x1z^H%ZC zf4}NQCLW{|o9~Ddf>cq7PUxYks!Ic$!A?sZU$|kGkYN^NgK1I2PT$p41!xkKB z9SoY|<)x@+8(yn|pNVhezpwJimXaeXSn>Egde$P|>LI3B`ixhi)njt3VPDf?x6a%e zLuY@cp3b62w<>z|IBq!KU0L7qs-B4;9Q#n*lAfUOGjjiY%&IVPfnSehD~_mz(x1K5 zaZx`$`-~z?q}RRgEM)?E_=X^*Vv+6J*p(nwnWkr?wvKpiJ?kx-f)zP>E9>$b(MAlU zZDDiBS-TDKB5Bp`{;vh14uWDCn6%rFtM&DmdCuDUv4Z!jhNdNON9Bb@1B+eD?(ii` zTJzRmhhc9ESSxcJJt-9pXs8zzx&9{3t&=SUY*ji#;^!aB)cJcEg!fm$Ly5$wo zt2+lt7Ak5MSYDLkX%RsvI=GnJFrXD-IS@cx*x(G|qJ8EJDO1trYjW1(QvS8@AieQ* z7!lv9UEQL?J&WCBed0ow_xbPXgp#D@nk>%PiKVUIr+kWJ~&2 z@G2$G*o@E^eTyrkYt@@&=P#Ag9*~h*MUEOh&L~d4 zAhvPH9$Jryj#}`9I^hL$THU>!Xf_JVrCU9A7z=HC@-Y)^Hcv!dEwAo`CaktW%*@Q5 zZt)Ftygu2oS8r zio>nEB_A`0hW;71wrp%Wr4($mEFdvCTQ@5?GLI>SQ2fpjM%r#)?r(Vpv!b$(nm)X~kx#}O` z*oF)YyE91^$NenPxfLkPKEPL)M9e28B`DjvE#zc5*3Mj;efBNXpb^4o-ujvpiu%ZX zX=HW<4mFy3Y;}fIPRh=(Ji9+zCJ{5qpNE++bxC!>1|WE+$rYeHp@QKi)|kB zwkg$rJXM^F>DFI#z`7T=JmZidZ+Kl~D?Qg}WqC!QiV%#|KZDIM10P;_yjgpX^U&}m3`9EmvX}C7uWj=2>@m~cWlkly|k+ex>Ik^!4Rz&%X-aIG|^G2{P zLCnPTtl99)Z6+V1rg;?0`~UxurbKb-i#K9g^%q;?o1F2iRklQrIkcT;SCFZqKG|pB z-}UwHeMgNf>{|0N5MKLJNv$jIC;HZJMjLP}=KjyKZ7r`f_80Sh7sj03Cro~Z1 z@rNjIS_ouCUnHj`D+UY;qYU{|S?&3WhE{PnmYD`b3~&PnsKBa(9xFc9HlgXR*56!^qbk zuAUSTjy#_`3uME-OU?A~PBE)=SfMsVntT>AU5tnmAi3^(`U4f;zoi(PZA7~?^0UrM zz%%20hTMZ^=9U@mA9<7bvLr+oPiw-;bH0=JVmIYR;ILkT$U=SuIg2`7)bjT zCJ!B7K6P^?+7XFkfEmsvM2qh2k9hpTd_I3@2;+2||I$3xQDv6`@d;2Q8<^OBx7axX9P3FVrsC;2GcBsh2;2Z0H+yivS&8ipnI|~ZAvr)gKW!*uy$zff))9j zk*FxboNjSCP?I-!D%$G|`^uM`L}c!<4qW0}Q}xy@LuQ0*BFju>jmvKO*nWu9mv7-* zzWh7O`hQv2M^fFp1bCMi7#+wHqF)biFflWqw_ON{&+!DBR*cj|be74?MLv0cn7j`B zef(NajXq}j-86XUHGZc`!Pyr5RW{_; zRi?_BY$)&SPb<=zFi*Yq(o+C;Rxr`}sbJoZM|XUv4CWZy^-||8p7B@McsKCM5?11{ zr0`{)xw1O_tDml}d*s%yNZSGTWfqn%u)fX@vN{7Pk54k*?O!G)Mm)UZtIKK|qrSyLJlp7s-AW$LI0KU2;1^ttA}G|C;gdB5DO97R*VeC^gPP?A$Dezop| z+Hym!{&HF=mSHLRnj!jYTDAV2;54b<#2GlvfOVdK^M;b17q@wUMe0^hiJb?}7u zxC8IRA`QP$Q%ZBQ_U^|$C0!OxDQsBCtB|!v-zO8kU%tM(VKs=ghao~~>< zqg1bz0b);`@q!6&&1HrLg&D)_Ge;omk0VeC^lCJ-^k!ezhj-!PGI` zLVhb$M>XQEP!BD4$h_v?85PM3)>li;jO=O7uta`LurmyCQL-FZ>#%f!#E#h8+4?9v(Q{e~u z|1K+kOP<{PNUXzrzf5-#urotDeMlN)KjCKjxhdqJ+^oy{^Rm^p!J_j=IK>lIgDtw; zRH8;QeKab6tg(>ZDhWr4?F)-+%Us6~HK(KykwTz=e`;lAF&+&`F6EgQ+}ClVHJj15feEOqQGJ9gWi$}#p%z+YX)Pwnwb^8S>3n0)j?htM6rpAXXH zd2+BV%l^quwnK+PJhGj7Q&tO={TvokAoE#6tBF4+jq-(zgX3(y)bG9OC5u)QZK-#r5{<>E zhLumYmUiK#H4YAyd@0ErEus_E)na^h7E?~b-E+s@Bz3DGGZXHgSI%y{_pX;kvXAV3 zB!8%y_2*PpI?hc0C&BKVD5L$PTq|2Tx~yB_#hZTv@m}h$GX6LThfM_Q8tdw6R#z!j zj%53F)w?qOOc76DYaP6D)_D4s^2OM{*H7o0;l&Yy>Q-%M+zBxVULIlL`U~gA>P9!- z>S|eCt3geaihFGo;&Hj$+QT4Q1S6dlwdAm|v8j@Vy@qR+>I-*a#-E{MhfG(QVx6AC zf;-C(A8s+NtAEsZs_>(G&|PRTbK2j9C2BRe|6MaMTCca%u3bGG%e$H$Y+?kftNo+v zUL5vO1N8dC&X-_{6cXY1nI#s1bFTNBuifsV+CHZ#R?tZuJ_=5|@yQtZYV>9?U?9(} zBxK-FpuF9b-ZV; zMtp!g|0~jyxvl?eONX=mFnY8mzr67F6zcP+3*%H$7{&s!o}@`;;=(TY3_$CczAYGs zssb}rLtzivC06o%&~iGiF#(RzCYvq(*Tkc`hfv}FqR1}yW?34qquSU*MXi=;=K@cL zu1da-ytJE7D=jVkqD1?s_k!|#{8fm7+CwJ!7Rj0CBE+A9BQ*pCALjoZtrF)ipP21Z zjZ5~|6y{{gdvhXv#^v!=P)+?KZly}avyu%%uG2X!pv)<7r5GHo8!bW#K4T_ztl>&W&iw1p9*2;kX z!Hrxn!|tMqV#$8fZjM0j~;K2s>6uVacr*7Zuj_|-*ZyCN& z8Bt(aV4BZUQK0Ci^h~iuzim%aen;VWM_1r+k63X>cVM3H8(E;=ruJv{hy6>;FvH%W zxnlo*Qx;grfP{6aq7G-um*!?AypVyiP zlG)apm2<~_Q{$!TGGVL11A?wWX0SuFQ$~~@=Yf!LO$v$^*CG13(- z7vKDz!1BdyQ;a5n%(UwXj-%bJTtgvum0S3JMkvqzB_vd@v>yF1lY*^y9qjWfls0E! z(A%`^60n3%8Wl8@ur-agx5x2Jt)ZT3oe$-TNkxdG+y&L&aP0<|=>DC$Muy|pB>=0Qk0cF^@qxg*Nc~a{V2L_)yqs}V|{1Qu1M?O8vpXc zv0Zl+5q`~&)yoX9>kymZU#lzrO{ve%DtLm5-;WB{&FA{hN3x4_XI}sLV{qy|P*Gkc zlcM_)b{45jpnwgrV+g)__0KS0RO=cVsV};jaG-P*X^lKR-|&-vK*#8Uo&5L->|jOXVv}j=l^2tQPxz|4_I;+@`Lxe_3Lb8{7CH(Ow9(@8D&vyu-L@83 z?8;ULO13v`=7&oa0V`_~&u=oHs%>TG#5$lwqOm53g*LWF!Mu_nKyaVIY z31_=%*Ok;bNn0NGGu@B&GS*kwvZ@?Oefac3_v#xkVFr!<5gGC{Wo$oUsOG|#_ljzc z^E!Rkzn27CDmAM{NUsK}fp*wU|Gui~`Jypi(q5)2N=j2w(lN4;0OMZnC5DCh+`u#q z;Pb!`7~>!RzgJ;mdEyrDL*8-IUgz%zn%-?oOLH?*+c@XiwoN8vBU&C6{0vktfHf;s zm_BW(e*uPXduILYrpFF%2jkB6tNE+B&L=A0S}zThFBumxz)<;S4|=K>(dtUs{(gxhm_etauf0b37V-}){r*!**_>Tuj-1cd zvQjZ^OA~>2FdqfJZx-F@Ff|%o1)10c6kx{WR66)I?((+-z63vn?;@A$f42yXc6m2l zJ=VF*x(=_$4yWh(zk9kE^Uu-X@8Lg9Y-C`RPE*cb3i1jjrit8;FBTReX>UzvlEp^0 zeuNprHFPv7Qz|A7Bz5;AtdRy=Bf2fK%ahG_)r$JK7{B(A9fH(Fc1XsJ>K=2m_F^SW zdk5VwxO~0lQpFR8e*FS%e`YUu;_wc{!D3H4W8BT*mfooKyYh+owLZc1ggh9B*VsLm z4xdzXJ@*|;6LuF%U246f=B=hKB2c<0D@p$|ZOnVpIxo-Oezs}yTPqA#7-HPZ2^K%` zxi}qM^y@0Ia`AZCt%3SOte36PM){kGjh>DkUnF$qndrt8X)Rdp9bz7QGBDT= zE;IE}v7B*sM)J}vFOsuLteiq4G(G5VwsM|6YmK9u3z=8=^6Aj2Dk{?Y!-izF`Ub5_ zqE*(NPX?E@}S)X&Ckz@ANN;b!q0-}&_|JL zf1@5nA!@$s64vv=%XVAdEJ&TyiI#G2FlNezTcmTQe7HKA?d{ajbV;K#Z`?P>j$K! zy7h80%D1#Mzkhi!)RUI;d9S*$k$+?EDuG-AUB@<}So*+vlH|kS>o1v*)Wi@O3v7p7 z2cMl|Dn8`Dw8dbN`rnFb(8RHhQ-_zoINP0zg>7SrsTHOD6bERpJ9NdW`NjBkh<-Z! z?iecG@(Pf;{FpKh3bC69mxQzJ3yKI-wqLU(?B{3tDGBd59_)|Eg|aF!{b;62vC@9B z7XRvZ&gZj?V*zOXxI}d1YcEyNhjbh~znRR`%}lykyUQ!=o^KsT`hWfP-*|vxMIO5x zA!z8*KbaJ=5$F9W)!yWHlE3J$v61GcnJxuOz{JhC^_DSRT#G>NUYTTrTtamw3XQGu ztmjqaK8MIIhOyc`6P;0ITXwX5q7Zu38t9|SovnUb#)aKTom~)}Q#|zV?{fY$II`cv zE~g}Lf4ZX8*raq?>~?AS`_%I0lFF=i@@A^LJa5)CicF@*vyj&0V{8zmQ*71i54`j# zA0)j*w&-A=wFreoh>cB~l$V-%4RDF>`bWaylits#4-X>Wk194FWluXLjt#9JHmGLE z9s7IJy|9|gj9X%QQ#%Lp%8Yn3k`7XH(qVV}bmt|x2zS(CpqkoU4IUd5IgO>0AoPdu zr>4?NM{7}Ei;Edh)Hru`MCRE3q;QEfua)X6yQYZ{UAiA@f6JrJvL#whM#&w+E)*aV#2~sJwwv)AH$fhe~u!*uOz&t%`eSf2nKKjFQEy8JLCQVK?%ce>vSm255>*He0E(KI;QH0$qD-KCQ9SDb`; znXE#n9=Fmc2b3Q6;dWIa`g$35N0K&CCA63GtHX;?+3U;^x}&vcGw+^tTs7lfbv*tr zeI(t-KUi@)&nAnmM2XAih~!pE8f>7%!|f%`yV6O85FKgvRu3<)J4h{Vjm-AquHG+m zaOAZpFm_T(@hSard%U4|8L~8QcRss8?nBcvxPSFJGu2<6lRkxp`K6uW%GKEtS%uPA z!n#;><~JDE`Mq?6+m{#4<8Os|g26_l|doT%#)Q4|eG+)2gTL;rcO)vSMsQ61A}uD@}Ab>*EW*FQ=PVGk;iG&kelm0I;G6eviIxBtZv>Y}VJ z(%H|b>RUtxl~9_5d>z%Jas9h04{6s`d@``J{sSLBdkXE$GyIGwAlmiOccr{PSvlde z-s7!LK+(uu4L`|rf+~FF%*oL=v(=HhO`9xlr~1N2f&(J#t;H8)O^uJDk~XuzDP!)& z|B>DNjS4VT=a8jK-|Re;ieXjZRMgQFpd0)6$5>RQd}K15H#93EW6odMt+-S@e9@06 zA`_=H{XhxxVozgO=zg4DX1YH~4!>L=cf90fXyU~j?Uap@^0?t548(G4j1QqD5|hrC zj!1*kZN@H+3oqj5BOe7jE2hZRf!T(K=@<3#SY?dy=Ki1rRLw-MJ~}9$NnVI_TBr^p z_IH}A4pLxgUzNvVj8vyPZqF4l!h4n;jW57JxzlmAcq-Z_<`}!7Q#Q=aEVuMsj0^Q0 z=K!=EsdLK$3TQYhc3`izh!B;#$AJCdN6R9q67$rcyc1dwjZ3{8a~lN-`ItbbA9J>4D^CR>7;n5{hsf zlFCqs9I9oBwf_fw%m*VbE>`N;2`EG3+{X7gEMJ$Om5Z$ol03iT@A zJ$UF1V!o3x-p6ax=AI&DYn$=>V%6|-@BU@jQ_1J2+oJq+d;3ig{@h9oVq#w(pClUk z9Vw$ajp?UihQfpPKNrJ?_AQHXHLRlz*)F#9*za8h2}bybOV60Gy_a8WA#(2`YQYbn zP$LvRqDA}^__dW;AAdK#h4K_wRY9GDdp_N=gThNWS1sZfS?+OSZ<89m^wmu8QLYY3 zi+Y)~_n5HX^;15j;JO8kpBrUR@E1{Iy@^cK43WS496}cmoSqHBr$F+^Ruc?|QKqv{ z%&j@6lpbb2-|Q?K7b@JahtkX7Z&k&3h*}0@;$j{t#yZ_Y%L#Kdl%lqwe2zxw4#$c@ zh8U|U9w~mzyh9!a3b|YkYQT&4$;9G5bMA{TA?QMP3^8^IpZFRejp$H3swR%?FXP^) zMOXl<@X^%sdOW}NKoI(zk+=Q9q1_9gE!}$XoMA8*L2ka1OxzsNe)=NJ*vL&0vj*$o zDFhj_lw{z9>;*IE>)Hog(H#XuZZa5`loOXl2(QwMblf|y$yE8!N?M^<%3xDK9n_QN z!j?}bZa9mu%SZocbOGiAI5!x#sfFAZ`ALQ45ql%}K^ToRVB$!G>lXABiIQ_9z_D?( zoKJ;Lfg&XBs@+-Ww_JwQj2+}y8Q{no%*r8v4#a4Hggcl!AcL~ivv=$lhApPE7ZDU)4@f4Gi3(^<-S-$1OQX&3X@%?Aa9y9IJ7&mqgXyAr zh?+*K*3l@@bWAf`(9#b7aBg3PyG7@m%7RP9t*l z5MzHugLErXq&(vcUp-!d%uNZC<3&mlHG)Fq$=qb%)}eb*iwJ)vSO#towI8;Kpn2wy zw6sc)dSyozvqrAMk%8M4p2_H{iVJ&#@({jqT!cOGA9O+^8#8H4@a}B`cIY5GmPe7! zCC_$6FxM$kugC|C&=-S~Y;}b zME_Yo2jIi?FX7xAA$b7i=CdFy33o~PUKWkdKk8i2qoJh*{iKR{N?5GPr%$7=I$3(i z2u-;&`82t5W+Wi5&x}_TG{}XPqaL4t;<>{j^5KstKw_uc%kMMhN|NM!`^A{>LKQB|53EmK;MJ*QQN%9 zU0^iAptwnRPtbD{1yoe*dV&@(r3-?)Dr0KK=!=?_eKOR#l#g%GlxKo0Ch$Y&Ge zSws3-E+|93_8q~+j+ZV>I4&XrmHj~^-;MFAKLUA+)8JX$-X$ocbjp8I{EIM3od?30 z2pXPn=mXCg+ya7LMlmjl+xv5RE;kxbX z2urtADEK%wY#C-#7a5b$IMWHKQ>ezUGswM%+q~&J-R4OaL;>|)O<#40glk^CHKVt| z@>nIdf{}?&@s6}zD=;f^=n90!Fc5R01ie0I;pRsn#hgO0y;_I*cxK*3eLNflfO9!s{IVYie@ZvD;V-58*@XM;efY>2FFzBv-RVW2<+cA6jsYHep0q8 zJ5!#ANO#cV7%L*y9=I4WwFA<{V9Fjp>En-aq2Td~;h~UP!LtBJujH)g?D3|WfbZLa zl6o;2V*+n+CC2;9dY11oNGbx{KZ|-cLGC}#JoP97e1ZD{$a*1ZO`NyI$x@zpJ%6TU zm^$%8EXU$yBe=FQ64OtVYlXb_Of1HY$%g48UxNUIaIkc2iNpnTEg{Uov?x7NiZ(RZ zK#&Zra7-kH%>kerz=+hn25`_`yaj%g^uZDWSlxx;0Ft1=s&N1;g~LP}Ug*Fc+5n*p zLEYzq!ei>_T=BNuRj1o>KE&8Jsw&vqSPCzE;B4M4j7HQGACV_&jko0O!+eX2Rlxq) zOKb-3avm(f9<$FZAzCMvVDyS!jd&$nX95oHvM%811)VYLq+XWDi|gftZ_k2ASK8U>#L`J9{JF+`MVSf0hW6@;?lJDi%Jlnp-cqg$6bQ7_|9 z)|iTE(unp!!5l)`Gx$w=5ys{-XoE&jusqHG_4hz}5wXm#kp>qN&CW2!t8)$O*5Qdr zl??a{`P|gMUDbj$Vh?;kp0yhT`mX=wK%;*p2Z=nM+F>l)<;Ebw;@G)=42L~J0Y%mZ zf8Z`!uJ7-Fk?p9iJ?~v|y+nqA9tdg;`G_l`+GEK62)PvJ21az_t1Q zb0Ped0H};UQ4myP=g>ock(|-W1j)Z>Oq;BD6+~ld6?12dy%8Q{nB0Ji;eK_hXO<%6 zrpO3Nz+EwUlbIux!$tzKaWs29hV@`NrrxtU$f~@H8~U7s$_0Sh<_dOww*Yfz?DDFG z#L2pLEFg&dV^h%`jQ8{nF&9^{p=hva90+ZfOx=}^;~OSzL~Sb-0_ss7MYNOzhb}uT zfmvILGeZZF+r^rqJ7`q8QgORn#7QU*dADH!4B`;*D;f6&3fNkJ6tIHGV{VU_#In&j zqwHcyXofMn8NnEuX$ni?({28iqZ}NAma&CE3^4Uxj~TwFQ&OoO@>#PmaS@jF(ZSFd z*>9h~%#86^PGY9TthF-mnPWKUxT;Jr5m`%04UmLa+M#HMmH2EPtPxXcHdD-YxM@l% z&PJbsPY84KDf~SOXgYZmvIyf!%%p{ z&^g0ITDoXGN@guJ^f{GRm_G_Bm3lP0fB^lT&Z~ux3==Y0gddldB3h2uKns8Z8rL0l zEx_n~a(fMt)XGM-)G^y9nY^qR^~~Z*77Y8nPfE3TP~5l%9fP`;!=uf{QU|eTEW9E|N}x^DW0w6FPlsg=-;M)1*p`;!61> zCCrH=d_8#G)Qu&C^;iE>Ad~p?lHSHh5z&9S2T;i4JMqs#PEj=^48fdOg@gBw=ng>e zFf>c0a6l8?;W|90A8!((C}V+-deD=Cep^^Y*8nNu?9apvDsD<+TyiV;>c9y5@f@+Q zu7Vhd$TX{4ps7nTE~w3LM#w?xBUgSs1z)rgUN|I$5dRS%&!;F4C5Pf9e1!54Rh0!`uJfvbYQYlb!#xWKYiY)1*asC)412t=V$7XM z`!|p>=-(xn{@?vZuv|!eGlY&2V23na8neZ>0pFv~iSNo|ORPA18f#FCjSvp=cq?Sj zFlii)2oNiWWjL*E1cUV+U|iB91|YfVB$)f@B(9kCwUHv6!|1Pmk@&kyr;j0w(2ziI7{#8(UiwC}#h{XV3E;Vd}H1s5W3+~n2x(ecRt;Xaq8?M7N z<+sr1%&l(7c!Tj)aI4$mik?7OE5zG*4ad|4YQ^U@u0SeSD{ypNM(2>#5fZEj9bg@? z6%K_!8MskEAzDu;gx~8a#wCNB9fjmISwVM@*^6Gu37Hc%O3mfe<9x$o34xAt2XoUp zwg!u#gs$P~vl~t!wA3asx2YLxcsl7>3_f4vJCJ5T@XSNdI2m)_fJqh0Q)|`^Q3+1O zAcalr@mBUwT#zEs8bav)e`vb|qPI9->EUKD&y&NmZR2UpWr!^kHQIxefeH;~qVU7m z87C38qMI@JU?NDBC(&Rofh9PfH^n*7gW@sX;8!X3_RwGxK`VA!Jh!|7%aKgO)}$Zd z){U93FBY;(;rI4Z6=P`P8A{rI|fVs8s;8}QN=s1;DeDe!K@-V)3T#E1e? zOv+(pjx&nkk37LEe4 zm&?;**NLWwYVq2{gX-vWQfGO(m^%hUFOqRavYoWpln3U&CM*};6Oao0)WHFWp_%wh z`nPxi5Abtiz<}%Lj-ikO`2ilZKy&IF1Eh41QT9AN;g(`oHd!R{>zO>A zuB=*o#TEyqB;vCnKC}>IMC+0SKlXelg$@$E@5+LS$TSS3!baz%h}D97DTlKcU~aBF znqB^xd)m9x@t*;~9IDnKhw<=Ts>FD>SX>+xMqNYD0PmMvhqN?kkJ){ zoCu-Lh*pr#7bQ#lwWP;*m*8#Ni3+PIh+X5)fb98EG2iRzzI%v24Z|Y$Q#SE@vAHpK z#vOhFb;3N6E}^EGjbTJTkr|~s22x^?Wn{H@ z;4KS<>+W*9b*V~30z8H|y#%f%L{d%21!Jp4MU$7 zP;^?KZnLw+WS$Ddi&x{qha^3I=-;X{8V;aqC>cIwN&Wz8kTNdgNt}n#0x2!42y$Zr zN-e1sutr1;&0rG62}@-EunH8cA=tKrU{zuTn3J++!pTM!MiVCpCFJWs)}g|}6@(G+Z#hbFXYjKTrCyVR?KyRQYQmF z6X71`l@S-coomFT;#qF*SkT3X)pm z)2x@yAfE}04L4&pc6hJ6(Pab~>5m;U@o98WSwF_kkW@D2X|kyEjy?g1qU*LHjCuaWVO?FgIRlz2Mb7;hUhgglc$|mtirSZ2qkQ%PowG z#1pA)Bh-W!52Rd%S&GD-dgLm1;P_HwT6OiCz)Y^*)_Cgnyz7e!(sKwMtuNBjK>Q|% z<9Q!OLXf%|C-=w=BM{%z2w80)sLe2Ll)BJw~*0HiSL+V)GlL&>myQghOYVf+!?OC+KWj z!I^S_s2G0lVDuZsJ4lj?TFeKrgnIZD(aajm7i7PJ@XXa;)+2s2j$!{IL;@0Q!HW%O zDig24K%f3Yzx~0_M8b7%k>oLEO(Kc0mlOk|aiH+UuoLM|qvguE#gr11aM@iQBbiYJ#XT#hwD`{`UNPBj^dx_ z8sBCsJa=k0p^juEI6VVl=+cXJ!feB~=-(~@bO#5hoq}-8DB&CmzAYUIThL>Z+n~d! zXArlO#{iQ9R8ZT(?GsRNkOCT$Qc@~AjwF#&AwbPF}P&nbd5d`CPszE0~QGZ+^ZM0UBxNG_Ja*u z5Hj&_5herBKS*h;gP4II8XA7{wz(y&EurkBM))SDp+Ku9PSeW*|4xwv9mJ`)nq5%Hf^80HD4ZT#4Qon8c~px!Dxv3>%>$BfLyS^v z7>#t{Rys_v%{*NL_}i~N;w8N8_p?QOP7;q9o>V_%9|e?4Y~gw&gISML4rbFrl#vCq z#K=saF)!+IYrFlAqAL$%`hVl(8s?rWE6gxwjuBGMxo?uI+-EX(R2su#YB3p>9K($UJsX zQ-XCpp%cZpt1MDrNe|HpCknLivJN=&#;Y+UU}=U~xR3xaHFYEy7)y?SfCL+_8w)*o zeP{6;H}>c(|J%b;g&@a_W-bAT498qPVaDF^%-h?T5|usrdEqE}jPHYtGoU?3b^>C5 zx_1qrnV;%kezLTv|4Q}Z6OT)bQ*QSm(kwyI%vmW`X_jINitX4~_!zeNYpI^NqvUL+HtE&s}vUb){l zUc0~LuROS9!u-v7s+3R>sP^QHEjtX{oyh3Tm5?SJxw<8N+%mf(di734puF;hJC|+P zsbg`nr#V<36i1YmHlNvlBTUhFD!Kz1S|@Clw3y#Ha;~G;J1y}ITY>`_%g-mxBm|^6 zjM+ues@s8gBx99Wu9>-K>(C{Hc1-fgw6*_K8#}Hrs%$``Ht#2Rd0qf1V6~vRx81L- z-co`2{3HrkH3W##+f{H~s@Ad!&TLt_A^?vU%XETsN(2tcerUgFfaA26> z#*R;!k-(b58TYM@3uYdDh#H_4Chc+;qZBt&Z-am3RQU6ed?XU%gv9ztg-N5iW5mLP z-Q+if!X(GHK#KCc)-j(2(Zo1b-XON{!1ZUR{Mu5R`I~En2NDW7dkULUcSSV$jA_bJ zFD%~giE?>o0|Onk08YHA4)p;i#>v|!0pCl;X};s-%0?4dBIUi&W;VcH(rYw z3+0-md46e;&bXf?eH}%Y>d1RaL9;XafntPkz*lirfnkR^Q1(h_5z2VMah#NM#B=Z_ zagyfyqrDnkiuieWMJj~M6tT+L4OH`n9)9NttlnPK%gj976Y+k>aJA+PWbz#hlpNK9 zH~}|`c~n-jYRA5$Xk}Y;bmxmj1L3G9Xp*xQo$X__iSH5WYp_DmEk~>(^N;4-%{Oly zB?)e=P12m+Z;egT!W}nrkFG}5)>_i3eWUqx=(%cRt}XgWg+!U{#ie2>idmX3YC ze3p+i_F>S6ODfcvB>uY<2&VK%7F3n_>m9GLBV(^og)jf;fWsKY_>Ouj1|$WY6K`&@ zxwVZ=8R+BS#6%!-BRbMl!`6;4)&p#Y?%I<{nj*Bw%94!>R#MIBakjn%1v90kt!<}! zLz?*}X|2d{JG#eYs~sIiv67#r;oYWk7`;QcgzGT9EzxGoUpWEar`Sr`$dehs&Q!Zw zRgu=CQ)ZIXo3)sl!O2+0-I%FpMyu>qq%7<2YyMX#c7HN6zlSHMD1yRPb$VdUh9yLw zV6uz6!24Mv3@DagvUe8SIiU9Pv-sj=oM1QE>0Xh0?n2Wv#mVZOS~e8yzCG3J*Rn(Cx!9_1!n^>tleA?g`dY8s z(AB3sTBm3-U#gGQqW|ic&!O1rYUY6LDF*hEqhKD3%!#w^yProfs&ctYL|LP{**mw6 zc>L8*GBH1G46N8bU%hk~^2%PE)tC#Cjow@SRif$;COx`Z9k^%LM(eFb4WwJfB<-|a z4Up3*BDsYFm<(xWKTUIqDg^3@Z6QsTOp>6Q2bS^ZSU}y;R#kq(?8yv`Va*g<_Bywk z2~(a@2!XO{_Z8=C=tTdzSs?tp>=DV3=M-(?V4QjbX*$KcC7ub9&RFE(RMTKy?N&a>zaE>rzUwq_<^R;&Xa$z+Jo10L!aLpT zY|?d;VV0|O_3lv;ltwt>lU;i})hjY(P>V_b)fL9r^Lu?rL8C>#cu(~fL$oDWG`B%# z3VO3_MR=-L!7f07Nx~(gx9BRG0mqrQ{E|Yq7Li;5(#&UhXS4+~E_0@PZdLISnl0G! zEnnd**Dsxg46`?q_wCq^N)75T$~j*ZSQ0@)$EIngzXtvNf5z4OE&#m;{{bweGm!F4 zy``)P&2Y4VJiF7Q%*ows!+xmYqtB%8FX%8!mqK0vg`x^gsS~G=M&qgovW9ma5s@EU zD@)xfTPbY?jcwqSaT|8p+p{Mg;rt`7?aY4`-oYvRRHa$`8H)npHt^?(kV9_0KPY&{H^I6ZYRCy+(^?xEmfDmnVNzBk)!g^Fv<8JWr6-DW||EpCl zdW4^8JpNCUb2E7Jv1VWKSV&$Z?VpDQ&zDKh7rW!_@ZlixI^KX9esbSQ>gl&Mmxo~5 z;NyUu{ml622no$m$pWzB&%ySp3Xitmo&|P>ZqKE?mA9HKj3TGGNA#;?#$66AK8B?= zHT3(g>=(wHkZM5$;CwhlddTABj-wS3^xNA|LtJH8jAtlu==f;6b%zFsMoN9?Lfc3+ zu5~b0QK_$(pi%lLcG&KK4i7K5>sNKZou>FS%PvWLx_!g1c?Bq5nIE@~FCo^;tdzCX z&or4YeDE(#eJE5MA>4N_pkvY?68B8Pki&$#X~%K+0~b`kR+USsM1K$UVjRtX>I>0y3QSqWOK#{1B{)SjY+O6n zRg(0)$36Fl33jt9q^KLBFGXIKsxbJy@!LP8w{j=j-^t1NvtVfrKY108qDK3yz;p|m z^2v%fKjL2}26RljLp;Ivm%Y72@Z-~j-;nzGc#W93?EuG@$6Zhznil{6i(V;=Fc*J$ zZaWd3dihZUY}vcqRr#bC!7t^ZsLN=)X$6z5uzh}hn@{IUh(06au)4LNmTM3?_ zgdxkR1Mt4)v(R0@O)Jy#RQh&swmJSAX)CG*4KwS7@#$10 z8G?ql&5-`8yj)zXqcNTEyTBvmQA$$-Z}8lv|J5C_bB++FA<1TBQre3D-G%}Wbe93) z%C+WVp-uf-gGLkQ$C?A_=EMK77V17(^`M)0|^nKqkp~O zOFOOTM1z;Bt=2}?6A;q_d`e|HvcnPQ-+L0`Is5$YVRhOmaO8ugGtw#pH?R?xItDvT z-ER~xd(Tdk?h4&e{`6$@aA~34h1Q>6`%cG~6jXkpi8Hw@-ifUJD*~t zGSF&3^Gs<55BNrQ8Bmmj50X~<-PNI0=)LZIu7Q~M+q8mf%Y~+n5ag3RI~E-yhiTTvNQ{ef_j%CnY}KiUzxyL>>~%IJ9W|56I(AKM zC4@%w$23U!xXpj-w7c799u41LmQo{~{l}#wI04z0JR~rndm^Doa0VaHg(5ZOt^T+X zzw3k&?~O$%JP)Z4wb8fHA837An)I{v>`7Og|2bsZ@IlA2T2oZK%XkHL6Q7HZHVG*M zj?aw0Jbduf{oP^EpQs4^W8;z=4*Y(t&-stksBW@TNMEpjT_$-lprr)?qi61Zyh{{NO^eNz-m;vm7C!QDNQkfR} zv0dTB#z)R~`EoF?O73V&=L|Vje(4IS2_=&k4*Rtl9wAA>t-t!Iq5AG%E{ddr$vxN6pwtm{NY}kDgd`sE4GU!rAqKd8f zhp4Xgx?f&$M~7*suJdHYOBiE>+@<^1OckJWp5aDg>D*TO;PE0PO0RccEq{BRw6K;8 z`WmEf(y08;OiZLIpJd-B|8n2!%@0oKH$TvE5-z^lF8lJ+{&w0+dG8LUCngp6xbty8 zRozu(Nbi>PL?YOg7$JkJA$<6|mliVEa&~ScvMA0R`LAaXEl=8Jz`j*cN38vUTg|>hCp{H_U5MrUW z4qVFfVBLF&4omwoWp;@hmP1;h&5yXub3@VxOW=@4bn|eF2JydMIGvkmmNAhH4e zG}-=0?(?j3jadQ?zk`h*5Zdw#znNa_^!P1X$Am59wgR})Z{wdr2j0*O_hu{cm$4z8 z+PVLtY3I)F-|Ju63_Vyi$W2F&j6@3!?B=G&m4*r&1XdhMZwcAa z2LRXa&kZKcS52>)HyH)q{q5hs?PX|sQG!6r8Td^5mS&>7G=skQ+(Sl1rLpS!2hVGb z%t2kStjaC^Vj`^GS{*mbqh_e4sMDmSb}UXQb8Wx(&ha2~PHO>qhmWG#ykh?yFYgY~ zF8xav(k_;Zdslewi`y!_PqkW5KCW;%^1;c*vwZ?S`rbL%J;FuG8lTNQ=>DygP{F^0 zA{Sb!*T$r)#Ov2?%U1Q*J#TeamOQ~C`Xd4p|MoulX~p}8^(c&JrFo76h+}xYx|cmG zi9fyWHB_4(uBoZI8l>Y#U0-`TRr>WGueML){BglG%T+DiVY8{Cf!Vatz0)-7`CIL; zm+V_k(ZMCV=&4?gOb_4x*R|`~w-zCf?JiEyylwtC;)UayWSOT&Wy9o~WSGANqwk)S zv3%(XK)L6O7Hq$G4*N#Y)eO7p+~3Ea>r`{gsYu%*MMch8nuO7h9eR=K1^T(U_lzs}NGfK$OP7ki zvr@;R6-^zTL1Z>yZsAgyziz{|G+vv(W)=_rIsJbfmTdALtMPjLT`uLPxL8_iAW9 zJ<5(kWsGu=ec2};n9tIpIV@&+O&gmrqMs_Z14Wd1#DO~Y*$AMTC-IKKwiYcv5<`SJ z(LK~)dl)sLPiFk;6&e0qd_K%y4oXF!!ZR~PQCL2qvL7HA=4F;Vr&m#EzK^JUlU;C! zfW`?!{QXd)_(8v5LYomRY}+4|+0N+Yy2r_q6TRg?CoGHCV@hV@e+~>hM5DH{!H`e1 z)(85Vse%&WbYTuYz0K5og*QFn1Iio!`GVJvSVW19|0)9p85p=lSw+g{apYElVWz30OwV5jQL?y31C-Q z@wu}fy9*tD=jBIcyqFQ+^*?z`45%qDfp!A9I5>CAcDQ}$?|!<{9Xj4Lqcc+eFj|i_ zAImsnZFP)wzfNza7cN{D#xxXtpv!U<1G=7RT?2|#IW>>b3FjVTrgU_Eos!cDVPoKh ze^1{Q2xdpU4%-gYaPeWP5Inyi3)q3>N^k+Thlo5k*Md^zRyV50jy9%e-#lUbJ z2QluJ#BTfR`#G{bb&BfQC%(L9Nf!=tvU-a+Nk<9h0o#~C-)%FHf639huX;7QGf16< zSq6&Hwe7EYl_WAoWK9>LU;9La8oPoQx~Cev-+_`RnnzwpMprk38(Z_6{9J zFF>xDeO>bwn`P7&B$<~TiK{tb?!$e@FtR$>ev6(dp(*EI5e#g5Z)08)-4`)Q^MP7g z&~a+yYP6!FCAMcgNP3ROhPlv9ID59Ma*oaQM!XNCN3`dBFjG|e%DlDHa5N-p@u8O% z;K6o-d9YQ~Ja**|v*&5L=6TG`DY4z&Tbfwr2LXO_)+p+BRnK8{{sp#|&$)~qJoCzR$%UFNyZgvKetIUUhirq?Z$shd8OGytzy0_Oun7^p7Yjt4>~p2 zf{Qi$OlOkjcbofdM;7VmV!NUv-LNm`?*}_RM>U@D-~6&mS5|E3$zA4oicas>oMa1KTVj%mF5SU1BdtKbY+N*K$NxUu zD&q_;|8S4P-IC>L0dGX7w;yXnKicBl^@j-myS?ah{L#^n<_)GdL9gv-JU)qvsS4Ab zVRP6PZucGzlpO_v>oP`bb47lCrI*C~+LQEn|3&d{r~-mSwvIO55yb!6w)bpaIDXwfY?x2iw$ zLIX%EV=)J>$+ChS`xYpqD-0w%3IXpUuOG0zD0|GC`Rpzac#F*k6WHr)pJup4hYwxHH;<;ddS3hw;@_hJ71*abdQZEHhnr7V< z_;vdz`_J+_sVhd40K?<9ab#_n>hBDok3I@(aE;F_xpb%o$Tv z;`#CEU52J4QA?3g6)>SW;lH6*UQxcL>osoR8->29J5-WiG^`e`4ya*wu42=j@Cc8)IcbeMv9mC;&SaXRC^@^%wC<@my7}&^fKKrxX*(fi)dnv zioNA>5sKel6i?)1d~dDIJmQ(b3ORtc7C9h5ei`8^utlHcP|w&!DS85Yp0jm~N7PJ2 zhM;mDzd~`lTHT$kMQ+@+yrYyCycBe6>-DY0*`|~&{}VnLR&@3Bn&>Svk$~Yzntw|h zcfIdcplzXH4cfwIMuDLvX~z4yTD%S&9`2P1&`mVF0vV4$p6*jLe~*VsOzE|z+gtJ? zmR_~RTEd7T3G?#+t1UBIAg3pn!a2vAamHLj*`DpHT?Yl0z_B{W_Tv1ER=S!wlTPtW z2ZAM+cXy5im$N=8N@n(GW#J$)tcH9TkhEMqRga=ya$B=$Xxv&?W!moFNj8^9nFVallAc&?{xIk0s)FL71-n;Qn%HxPg4H$b%4T3tf)6Q1xP|4^ z(Jg;z6}4*I3l#mAk-5b?fa@}#>DXQ*RsOmkCDETzgTcJ#aDSuZ*T&Dv^0_7>vRYpws`E}KguInAlb}Q{ zP#Vu3-Fg=-C=C%~MwRFeAGg0Z2rQ;^k99JiRom|1dW1YJAL4q}o0JUf)NwuI#)>>v zY=!nhlNwC9Ptma&wt|w!34p6)2qY@^N@RY>T_7H%=nd!AVN`j$xVHR@dEhVIEdH;B z07oSVZTqX(?&x&hy2okSw}KVr=nml%%nT4`x~^smqs7poE|Q(m#?|cV5Uw^B3fkXe7<7A1AwnQoH^^)@Wm%JytAi+&}!Hb}vg!s_&;c*YKQ3)fa z913SIEUDFqhkTQ?v=z1j8N)1yh?O^`h+lvzLW$QybkT-w^c5iHn+eD}LxKR=#PP~B zlJ2Z%eafmU#;-**>BlC75P?$7I`)Q=0s& znd&fG-n+G(f%AnH!r~QaS#PAdf#P~9b%;AdBYflo%|zl8!?lm!DdX>8jt9FHwZPE2 z3weqb>G+b>BiSd$$CMuZXwDXMdu>jCqcBA7$<5?8VC_D%+rg*_d$5Ce<7a6J$dFlD z*IX+`d|`okwWeXdSH!4@%BFTUqNgOHGIj^yok!scyuNg)$=pV?gk5kWM%!f)xT-=& z&$yD*EatdPCe!RS@2>w`b`?OCS)F5#ic}q4EY501m%g;1q6N2Md%gK8QS7MyzWNsr zf9(aEzw60(p!nA?Pc@XD8ie}lZxi4^#1Wj%*MI2~4Vl1|3`^wy*E>MM?P zT=2m(2IL92M{#vi)|rEk+ibkiMJC@M5Y~=ri*A_}$v$IL4J-IDF?s;iuY4??{=DPl zBu&2^aE`og7npaW0b)?;YuU+OTi5fL|U5o9dN-h!=L8VqEc%cwxyt{u--QgWm0G1!(vn*_mG6r%y0cAX0a6TlI9$tZ2i}RJ>lk#-{@f8VETL!uI#8G{@4s#f%geBwLqhXK}V)-`|I&; zZPO7BvD@+v4fU80=0M;{k&kCX5%C-vU|wdRN8h~J7(bGA_sorLdNOBLk8*XcM#iYu z&+647VU$yvOFjbJq2wdhk{on;Yq4l6F4aIUw#=O+rv=D36t(|hV9W&`qWjVYKJ;=L zTThzFJm01x&;MZly7S(Xt(P+e^g{lYS&f#=gaMN%M(1!MQ}jMS(X-z3uR12P7053T z!z_K)=gzjiM>`VQ3E5qIz!j76BDWN1U93o&#mO0( zOvPk9y7pdQJ(@?>ieD2b$p7w#*ITe%#yn(!7RW^>o-Elj*?qS)?CI0(ukX~cp$6ZS zSfTa1c64>O%*h??>E6o|&UNUEGHc?~tKst)5GP`WcE+}c@%=5w1?FZ`^$bP;vBsEy zJO8e-;#52Lc;O4@9H6_RuO)om3e+<`edlOze`=cZzPQ8NFIdIL!LjjGQ3@Uj&0GHR zyxm@m?_8%5KS0ttaU5;p8KYD@he-gODTPV63+TWs+ZA6J$E7+3$TLp|z1tfV4Fh?a zBTfmivUPFdCgO^T148n1SB8R;PAjG81RZnpgH^Zx$!GDK&sx6UmxDJ^1kTvedw%e8 zB$m~z-w~K69PMRU!u#J$(Og~&ln(&K{d0<537Ph{70U7w&85}7ISNBW~m5^uWu2n9sq=GP$#Ftul%D|6kGYw_i2i}h~+#$zr|NQz&q7@ z?%cDy2Hi{Zw|br`I`QMY_J_2JF=8+5-$*NJ3a|>0%-}cV$=*LiUK z%UL$XE7sjJ9__nBS3lFp;tD>oML#cdnK{p?Z_lD9HIax4Jo^&&@zbYvi&RFCf13fbZdWSc56n2$J}xmmmLlo_^mR&CE*R$n6qW36^| zEpHNYrSb2UKPWBg9P_(XfeHmf)0wnnxZ3gyS+nP%(eg~%YsZ60Gx3Ng!Yq;cJ63Ej zM6?{hmMWyyptC{Ivdln^hrwIyY~fG2N0M&U{#$gDLjl(56{_b}pg8`v>_toZHQ5;@ zh8E;%GAKjtP5x0eThLYt!{E($kwn$sqEkP>v>IXCfhs0mcNv&Iu(~a?)aIegcHrf@ zH5>fhBR&Ps>kRZb$)pzJ39E|*xIYr5MMt+Py9yYs&UgXXmu)Pv>lOno!_}t9LiY`6 z)nU*<0hUZ}uyUJ1hX3F??=)L3Ai$jYD^J#ytn|1mgYn(*l|3B1yfa0E6viKHRSA|C z*s>{o%l}N#LR9j_N6L*>r9fE>kCoRRC#7>ZWjc41LiPEa>tsxV(RJ%17;73URyaf>}Fb#ys)3A=<44*}FWJ9_yn2I2oqvdbNd(FMo)g@+%X-GEi*#5??%3@>r1ukM zl@e>Z_kzri>0aWe8CsKR8goAUjSKty{MIz>RM2Zc?v;l%XG4vyY%iX7aAbo93+vI} z|6u>l-=62p@TUSkMhGx9sUWZh>#};VIXkiv0Ha#c0RAd3R%59e!|Kr~Wl9cgY%~_Y zD3kA-03w;gH^mKT%GaLtLjKIW^Y1-u<@oc%J_j4LLy$~lM?)Ng`En9= z`?YBKkFeZ=qjYTHV-)+nR3QaL7Z%UCr^!sswW$l3!bl&p>!4tuFAF_TY|q<;mMHqO zKZSZI`ca!*5e2Per-YZwxV)<5Ajj&pu!a~i&`M|Z^|txPm2&ucJfy~fxreY$J$IHNhMcqbPS4FhQJZhRjVegAD^#`RlVA`G)(-matX zjmDE^>egHtj{O{9hL*w&&B@;AShty88*d_Weq7FuC9<;7h|#QfrxPwNF`r=-{MmlN z9>u|8H9dPj;f-uw4|CQ)A#V%DATfwCon4TTn8^iNfCH+*3KRuxtLo9%cickxHa@#& zJk%Lqbktf+6sW`KiP|?!(G-)5Rsgh=sdyg6mI3wGDoR?iiAERgnSrc$k?krFU)u?m z*%SwutG$5n$c;C3t6Evi(T?q2?Ok{z;%6dJg`P)51 zHTgp3?Az92rQBS#po$Ux`p z?JshahLN$gzHL6Lc#>f!()p`;zgF>BQ<G)Vq-V+u`HH?7?g8=N(0)@-R0kS3>~;ZQj{Tu8$W#UVD!IM*KWlCysVS6 z{%YwSa#*oqs)Kv#0HXhFB+=Dy%E=vKI2>;IVZftCmEqyWR0MwnQgVMZ6$f<9 zv?FPd?8u1xP(20|^%G*NGahD#H`-RgMUX9$H610S^P|-8%Ju|h{Dyw5Y7|j9Cgjto z+6nt7rhSQ;pVyL&88thXSmI zbze#ug8Ib&^Fq9amc95myv)|a8e>-JVML04qfJrOg}X~QO~jHGlr_d@z@DO#j-G=^ z-?DK}ZdYAN$Ny%;$3;)f9B?~#z#UDkm9Ac~Zimo2S2e!iH@)52kV_adzd@)gnMGv)ILDaLcgk}oJcA6Y&W$(2&hWbMvf_zJ5M{= z#*Znhh1`!lJan0c+q#W~=Up_`1gi1?^~72qX?r8*$ws9YQpJIyEVGd~q<2tWxo~LW z4B?q+59oMT1a&n%<0x)zr5E|1(m-oTBuRygRW-(I@y|;*>qk+kh&R3|+Txl6i9@h) zWC3vd(UpI~m7I00`*bzkkweIGpkBUx=%wgHg&_b!OKv3 zzi`cDX>yaeWCvdmImA7703~ZrJ)1O+t+qPUB<;sJw2^E$aM7Q z!|EG}uOyHMX+Xt&w>4ULKsP(wq0Q%`^>F;ftGjvFFyKS5EFM7`rzD%qZEB1{pq_eT zdugYaES;YK!{Foy`4I2|8bg#FQ}e|s%TUA0Q2Njr9pJIYx}uyW#8Fdvw~V%JDr=3% zU-GHS&ql11tt}*!FK8aXN5qB`&*&oE#WdX$ha6@+G;1pZ6lGHg3i(70@3N_Ym0D61 zVI__DC6Bn0fALYEYp#B!3WBnRPa!M$Mo5ey)Y{C})hZDJxUKxkg;bJKK6bmDAln=2+3clWo=PXEIhZuX&zJ9H+v;+U2T+wlD(zr|KaW%&LRiR@Y$Y!sKWKH$JM&^FImwi36v;8$?yYoHuMApx>B|NEW~y zI|tj8l94EJC+B}e3bOa*vBRJ(}(4*u?^x|zyeDoK3_bD}C6k;C5Db{d{NpK7viJ~aj(ksF#6x@&uH zTzpKWJi_VvI?{PU0_;2u?Qm=@3i2b=kX{f**$V}PYpHe(usJX`f+4DLgeJ=<~Hx!LLdypky% zVMnPi!xx8wTUU%4`GRl_4r4lFiUz#^96l z4M#bu-5G;+y|#J^M7;)LHa| zJ!FU`-AxQVY5M~@BB_BI)*WiAHpSKWU^k4&Fzrw4WOAb28s2~)d!j=Fmy@wuOi;}^ z$iuDy^r;A&VbOtpD5XY?9GX_S^Sr>LAl`urlw%Wz-mP{IUSD5oG3}khtR)-US_v#nu2fubfX>A%1Jv$@~$Q@otr$T0W+H zRQ^Cs{8Ez1VZ-|$vLxla2qIiJb}%8W!49WJeXv~~F*yeJT~Vr2p4+bPaldY{0CyGB z)b9?-t%=6>LaB;&5s5?UNya3fJgG`WBT8NRH*ZgVAy>Dt#{AI7%KwaS)0kg$QB#ny za=0U7YtalU^|lR z%q3S>W2CQjpB^yb9)lfE*T~=$(n*_nb1g6Er#>9`rnp;m0pv7-Vl*Kw^^E&@>H23Q zBk?A%R6-MA-8Q_dsDl$NP3C&##txqXF1x6TttMkBW~* zY}ip?c~qtR$kT!7JBiElJ~sQw=03=QcRj7~fC-6n*3y6nBu zb07x+G{cYnm>!6ey}M~gL6rUQBC7RiW2e(7m;#Xfa+TGoMk;ml$?D=_+5-XDO=I$U zI;df%bv8{p**68fpy@egJQ#EHoFS~cV49V%^ z`hM7tV~G5~CVt#TS4hdSJ$@mj&i{Vm$qOx}ztUVzn^NoeDeJ&56k~GCw>#SJNK{$V z5glkp>@)Ghc?ltVG2|$~LUcB)j1V4o;~q^!qP`M~VEn}JJj%J%bi7*XiSR&e*ADI> zy#k>5Q&R|NBQc=kO|kVr3eCzh0qUruIV|Z2p9u)L@J-EhO3Z0OFVw&;JO-!WK-et% z&VMyIs;>3R8XoI?i*s{0Jh8MSEqNc4AH^8q8g4vTodF>NT zI|S9n5k^FSlt~Idw?A@+`(r_N zgy^`EEu%J{Eqh|55jB#Z5-O#C&T?gM_7eJOa3XYE)YMiF`mx32(BS9fmm4@ZKRF%h z9I!UpWVLQbP{K!cEENm%#U1vw-XKPRh^t*8ca{;Pm!Dly9^1ET?|I8gIf_At6QQ4< zRFG4R<$z`)+$ez13N9{RPmj>KyDBs1ne8Mot|~)NBCp60pMOe>AqA-uG)U2v_4#ve zEvLENrw;KRML!6eR>QZk4Zxi?M3HM zw2e#XSLLC;*gD|f@M{~8-iQqVn#f{nfzKg$Dx5U#q6chlQdwVarDtHTQvgs$DfMbA zG9JxfRHdjPO5e244SJy^-qbZYI{&I?hA|9ESTV*%%MjIWeuVp^Jmfxe31DU_ZQ}sv z|L>P+Qws(p>5cYBk+}8SIOVjBgdx}*cxah(**pJSlUY}E>2<#p^FQEmFgg7{pmD4l z;POG=XI1&Du|I$@^r=MTV7?HHr$A+Sdl`ye$8_9F~mX-Iv~-k)_+FzJgBrL9UGcQt$FBwKUI5Qb0~>> zsA6c^P|jI`e%nv6dzF>iEOK(Z2;8RwjH8Z_K0ff7AAk|dsNI7dhra>JEuOa+U~)$R zlTPL-`Aa#mG4G01Ghie!$tIVCQIk&k#)Qx^@}@m?+kQh_fd6i&08x#+qWCH93KpA7 zDC<~I-IWumW7P^ubG{sRvkb#>O(;fiIX;{IB+{#x^`La0Y0Y_h(*Aj z2}9x|g~^7DRh-$hQY3jyGp{VY=qQb)vIKZzp8IIRXn7%Tu*7V9hjS|6Y>t*T**0 zWCTBW{w*pE=?dW))vs-Q=$~}fFs9ImAPaW|Y{rltaQBkL2z%_R8u<@M?ZglS0(|d~ zlpoQ;3pBNVH$I4rt-UAFADu=FIya(sYcr3~0B}dck5f!8x->@f4S8mxX5jWZ21AmU zt!H#XWys{HGy(t!Ewt8?dn-(GMxTtynvQ!w6Q-tJl=T2ospg2@kiP7x51!#&@WdvI zu3S<*un^VoO~rEmMB#w|kr#E0t@`SoWoB#C-arid>#CilWR^tnRIk3=7 z3cydNrsIR6Dz`q??Id?ALFfD@OtH~w0LQ(Iis+HuOY45%9)sTmQPw?OdHS`Gn}-d& zC9raAWIjIZ{)10kFvmoQ{iNp5%%EAlLVw8q*%xkza)Wl}VM4kN7n|JA%HODr{6DO>y@5Pf z?}Xdx0LM6*+UpOQxoR)L^wqN-jviuvq~q84scUJ(G?mNwa0h~#0Hv-gB%kVo_lf$| z-x@$YGq=q*P;RQ9MwN*m`VKz!Z3B7@ueO+!c0ai%*B=R3DIb6go=XisryJX#y*FPe zV%P~H)|G|S`0kFujuXDv;kTM7k@om%I{*$`ba%asMkN6p__doq;*@Q5idV`Kz|q5h z>tCfruOC+2i#5_55CxAo;TI&y#t;Cjp^68sp3Fz%RPr_yQc5rTT?W|J)ci_#8es*2 zU&yTkx^YwLpYr9P_N^_FkV*K0clyO)euQ;^d+uYnyPs@w-n$Y2GquK~wX%nn%xNxL_qXfhjm|+8Y1?zdZdSha!iFT72l6%12)ZKK=17JgkpX-C1 zNXiejGXn$_O1u9CJw{r1jX5}I+<&Tqs|8h302dPZUQvo{fvT>f@DGS0Q62Dv@%qsw zXJbR{B@>i;>27yI@M8$TwI5TCS*err`kw0_dTy$n+uj{e4Tu`(r}Aj2jl`?rRgCZv zQug4*R+Gaz|3GdX{Mcg>^Y5zu?wLa8UXACfVq>%T2RI-=n0a?jlCse5Qh%JWoF3m{X z6S}YDhYd%N)sD)S=bwA#x98y$WbyE$^78d40)SwbWml?vE@#YSUn@tH6RTx#QNn-` zKn^7hT5H3SA$>a5_Ttj@>psM=JZkiN1VQmI^Nl{-Q`GqYe;yZNxY+XQa2SF@Is>;b zQ(<%VY}4rnWPgv#O?lV|2XZ&OPfy5Ed^pV59b#YTlvK9jBWTdMbI^oJJO6I3T_%FT zJ&b(h6Me2TZ|-Hy@qW%K=`Q7y`6~Pjxes zIpM0~G>jawZu{r??*1hop$TNRnjckF1z)2UO%0a8A{gj}PzOw#Q^_(3J*@9I%dH<%sYIBgq@Gc|Bv%h{?h_T7xE%~!L481r zXzH$WcA#?>b?Dt79d{@A6F{$rSc#rJz#9-FnSWbfRUW@b2tSU4rB$X6=~;Pz6Py|i zNIjp`bF7e>7Qohc9zc)>84m@KBCmzOO5@Hu7n*wNBX!-!wui63`y7AS3HAB8iQ=*s z)H5DXbd=x`Bv$2y9&|dx+Xw*R9~JIKfB`@_-71QL$$zF z0TF=s3;g^g^f;>;3kWfsb-7Xck9=ee#zonKes#ue4m%ULsU+@BV`|ykIoMQ((*X#l z0zlTNXFI9J3QOvqUYiBMdMEmxSdWUc-_F)w)UFC~+4~%ip6dhRGkqbz+YTpM&zuz6 zP1cY*?Ybj)OOd}aTo};C#H=R<+$SX(JVuTJcx(o$`<0JIN$yrH;|An61?&7IWwPgg zOyd-{7%}anG;A2~upUm1;x8_lsXkw2B?IE2(uO>2Xg*a<+v9=P1DA~8`{N+U=*y5h zq4s1)H_J5S#AEIx;HXaUs2Fnq zF5E)DRtdPDiklj8=dl&SI9qX)dpkTS5B;d|pEya_z+q{UWxw$)WjkW}kop9GF{WZ! zP3HjZ%W63u02Ap$CboKz{k^B2?s`j2=tPqf_k?cM8)9*Hf4Q%)lgY1Bnpfy?y^u&KP=jA%+F5<`}qr=M2{(d;BH zzH2pbA^~>?AQjo+!uKy%{{_YkfIL7%0s$7BXR@iTn49aX<@rx4n^f8}_lzH9uI*!L zWK#4sVyM)R&ft@O9(vxru`74!6v(mg?_rdY!8imu^fDxu;)|0p^n7qH&0y(gt4aN( zZ@HzZI|vdCflnXmZ=+~#ND7|qOlx~yxx?shy^VK1bLso$^_}ka@+6l&w=oTGGI0y= zysuI|_!q@r-fMCbTlgU?{Ae?KBAMG$UkjMMg>awJ>f zGG*GCPc-sQV(wovO(}g5JOgzWLmrgjeDX-wYmXT7^$)~=jR`7+LA7gXSY_j99vwLz zR~En{qVS|k9q)B+Dc>9iPMx~bMiT(!XqB;`Vl@FmR5WQfj)td#@WzjP*3-ypCnk`6%h=r^b2(9s#4>fP}gXOr3*goE9kY<0%chmNC?H1|h;)g65~F+s~A zE0^Jb?*@$S`q5AP%7|J&Vv10;Ts&hGy)`&$18i9mM0 zsI|w$w4zzts6?v}&ZJ3#W0+U0Qxwn92jyGQM6Eq1H&0MPctlT74D*UrqNRx9nGCmb zz11m~yC^oi8XnOQ%wb-!LN!RO^-Zq#gQJvo>I|o*2!>%ELrmmSg>Wm6dHwCy7xwXT zHB6Qyn5HY%8IB=}g=W8{+>?`%4Yz*2wZ{X(Cz+lnh^8x6X^!KFU%jQB%coO$PBx5^ zIR(#j#X8f|4DoB$TY_>GjUhRLX1ZdT*+fOG1_`&+oKjBb`l&^Dr@rCsct02&(a;3T zbj2#ik`(dn$W2bC%Q+R9JYOcWSb}7_VufZono&zT)z(`@v;@a=#VS+L%)Tde$t9*M z7J24z#6q-H^{uTS#w7?ir2g0Y>p!+o-0O!)o^cZqqO?qb3DZpJk1sB z4AV2jF44>*8jhwJHuY9EBx>$_a7v0ISeh%=DCRW8x0i3ndwR;*a4R)apSsKGR2xpu zJcb}?u2?yjXNcA8czCBitvcQdhv-S3;ApN`XgZ!JCIXUR?X5+)MWW(vRr0O9rt_$T zFNeoeaTo?!$RJ}3Lqju@CIB-M05B*R3*xs5$A9bbm!#G{k>*rnaV1y3U)bbJ-y{Y|n@ zbjpW$`XFRGQPCvu!hUZpr(}%UfuUvkuyf1pefR^A#)ZR64{!!PF4(wvP~8^7gM9t8 zVT$m~qY)vlV}!tD0i2l@*`U}34R%RSIzO(+2VJv#+VLfZoRKZP+%@uys0K}rAf{Se zd-8^8E3CZm>8$me)Z`U~ZN-|nkDJ@9CD};|Ua-}~{zA~%-KX406+P2-I^wf2d(CpH z1*OQbMN#nZc`iEO4Nn+p{JZll)9)a2BvWJ0?Ird21>;jJ{7U0GQ5W9XlDE0aL0uvs zBG}*`#pRRxA?P*L-!K|?malB@COTmDv0EGj*z_fEa2K1%$1WF3(w}%+*Xpu|2K*A{ z!t3CD>ChgGwj_@*@{v8iWq7gso$Gk=jM@q3Swvaui%#@zbl@hmmj6A3rH=YYpf=!D zCX$rLif_?vOX0^9z8FFC%i7Y!ERi zPN-AnzEO+v$WhW!K)o_@ESa1o&EVeWvl{Zl2LVQCsyrq{efnqVYdK z>0e1wsv9|x)7)!SBC#0f#t}~v{1McR0~N9tbtS4WZV^WP~4W&E+*b(%lqGcl0%FNDT@4h~UY#-dZDr<7+(2;?s4)@p0h}fx+TBu$= zp}geDW5IOdfm^F%VfS-0>%9fuf{n^K>byTH)06&3LVtELR z1~*Z?wb#A_#D^g#5i=B=wb+4g0$8bu`^-TVAb2iI6ImP&Ho0O)!#VKlVhz%w+CiKr zGd@4IW=ICba=vM}$|qhF%SLV+0#pX9NJFI|*##^rYJp9wrxbZTHQVh-yV`M zCg2!p@}637i^@oH~gl!pJrmP(Y7uGGk9ysT}T&7(~;*lPe) zOfGgxe+&hM^HQbzFWA6zccRth5+{QTV@T6m!kx$fJ!MoQRzaw;tP{1t=;bTLd`r1)6K^&!ET#5!e8x}99a;Wtj!KE=ifso@sjII zagO0&o5G!+FOq^xTn^Nkv+tZRwTg}YZ6ZYaBfq^TZ9~H7i<(oO)XxHHI>P2oM+1H? zyfXX6`S|dQ@}z=NVeh8?T-SG~9xA7cp`gw^sa_ZvZ)olz61{(B)Lnj9-*Pi^4HcB^l@(VnDd>Z2uzsd70bjye0m-kA&|MB0=HQtdk4 zD=3PY2PMYI(goiHR4)I%oax-rbQ7`zR`L%N1dN=6-SlNniYGX{)41}6T9vsrl*7hK z%Fd-0YQ{<+V5fN;kqEds!Y<4k8ES;kq_GGk1$k^Pf#gTsn;Mk<^DU?Wet2|dmB-a& zM2-;yk!!?8L=^MCpnx#S3e}a;Zy{Ur9tREs2zFS-vpFFVMF7$zP?UTHv=G<=1&Zjf z1TxmW*bc+Rd_f-v<;%k68P5?RV@+Y{0ogB0uznbI$wn^)zlu^YsWJBh=MrQs%ql0< zanhoKa4|wdvss(;RXi;zWa{rnfKETnT+df(>C$za(Iu)^swefxfYlNLt|+4U(CM~HrpJt%{{6__~RU-en^O!ysB>&ph6pOlD&B5<&HPVCe*C_2qXiVyOlI- z?Mfx^I!Ms1_0E1TP6->47R2fc=ed^!g-4v|1YjbQYQ=z6Mfo7gMlkExTiRyF%a9Z^ z*il<+-YMoCaRxwQCJ)Y>0)R9XN|Uv`uEMWC5Ez<_uy|~siTJ<6XdDa}*K#7ESCzxG z^Ck+p`j%*yW(5zxpfn59cL#_pj*P8AoM6dCA1A@yfje{~S|0Fp+Ek9+BAaJh3~wzgRpgFVnw(zW9x7xUpR&x2(j>{36_Af;z5=Dkrdk8_k4O|) zUQ*lowENcFup5l&7r7hKh)Im4ZjErDlhoDf@5r?2c`h!b!hr7i2ON`> zAmyrTm2UP4Qu%+j$5)zsbisUp_!KcP;m#3o$^A6k4&b~Qm&>i8cpD&N zwnv%Lr4U?#3tObYHBD91nhfBl;RY^kw|MqB?T4iq|+cgBT8}62gmA z*KvjQ+(7#m2tF`;TcM>kHSq3MPgo2*Z#psx9z4pNc&Oo@qTf$zf5_c*yG>3g@q#i0 zI$_f~PkB~&SLDF=K-<^km6e{fMO-1NAOHakFMq8znFQ^4t-sEAi?<`O-6L7Dywgaf z(em5zI%7n^a)?+-;L~&p4(dkadLeUdAWAs^w2Dp;PG~r0D8|rPp9*bB2u}RCvvv{? zx(w}?V)2!RCz3w@CO;G!G+)Dn6v_t3Nt6Zo$z!$7Qe&e5Wu(J8gGI1WI?{cf?d zX=yZuF1|LidxLhBi5N+1(VJUi^yF~qmdp*E4034P4lye>U<~f!yB$*t!Qkn*IQ0V{ zh#rJ=Q7!t8C&tjC&(Z8k!z!i5QiitaDnF>L={2m{`-gNNHgj=-*385tq1z_PvkoKV z8W>0ve?TJy>xp$pJWKhR#283IE9HZKzaKboayq*x|SbH++jP^fQ5iEA&yd zo7sqYvlr9r8d=GP+!u%t()lF_nx!BxSHiyGd#;sbA8Xn|P zEAC#&X80=mOvLH+0a3yol;Nn6Rs=Vm519}}(Z+?dLNuNy`|42v>db6he0Xq`;^722 z9@NaDwhZ_*aRkOCFH<2f1{Y4?$DZJ1OfS1dnLEQ%rTJ&7TkTfxcQzLgp9^yA;icA1 zt+M%S?9$e#Cwy-89EjU!kR)_>gCExOokBPawYU?`Iv@My5cQL@{UW$hVvdNDp36;y zv-yuPOA~>Ghus#ztTmnczg0`NQWl?0hBBt_;5OmSR^P(w6R;4v z8`S%O>i}Cu^L!azZCLKdyDfX%#gyo#JcT%Dly=hPtYXHStc`r9v$r1%ZrC-~X2(D` z#xJ9+U|Tf?wsnB&#Yy3#X5mB%4kPaHr8p@d`9P)m!RSN}7<(w?`W}Lu+90vn5|I&KU+WQU8fuYFA~jMhwQ9}>3=}a;yzx0T&-*z>w5=_Wy-2n zjIPkt^7b{?4wnugUYGJn9iT=F+>p`y*~NgIcm_yWm( z9oQM~5KkDabGP=2U@HzzSNtdM^^8@YYlcH<3GqoiRvLcYdz*Si^%PO870>FV5ITG2 zVY`|pSqfx59pszgQQFxXIh1NF0cEF?5DX27ZDNK%&u=F|>p?3kW&V|B(gGqZffNF7 z*1(s643|x+N%Kg=x+?cz`@$bJ?4pp%o0!2)gtgyu_PThhfo%nB6C$?lw2y1KBV!{w zdY|88o(B8u>dPwAkOV&$fYP#=5xdXb7R&;$#mBc_V>pSq*6F;NvaMM@FASBLWr|#x zWj)wQ9h4A-^q+U;{3=U(wp}Kd#Xzw?3DoAKR@Ktn0_i@TFjZ&Y#Y1W9{e7d~%OvIH z6H_=|NaI@M1-^w4)0*i=vhVU2CK3q$<Kh^=vYeY?1hC1ftqKB6Wc@z zY~3nOy;Q59Z}1RNke+6*kmGgOD>}1hLvc4y+Znm{Vzas9Yg0A30ME9q$i@Z8<^xly zwm)tp>Tw%Ef1^5yG6o6IFGavMt8SV^oQE8d*~zOje0Vm`aHgW zAEg`O)4@4B+7viUl~jOO>Lu29L(bV*?wprBS{&V1Bk)!@iN{`IGMsX1oRGp_Nj)=? zNmdQu1>|tT2LcOJl0khf%ZxsbG0Ls-N3nL;BX*fK8c5+(ADnd*q8@5(HT_SQ48@-P?q?1ruf)%OUA*77xwNcNUNKcbnC+&Axgg3eS>Q$Y?lgPXWZJJ z1M+J3eDgK1<;AsvAbNr8kC(y}uC3Ypzcy_h^6AR8JowIK12nP}s~i@*DkLitULY$! ztYhgV#nup zYphlB#&(G7$fN0gsHS;Ft#(v&#{lA@Vy#)7eSQvcYlV80m{fr^TedNDguC%|#O zJ7k72oAW4SF;I&5zUv0O*D>*mb1>cZIM()2ZIIxpd6!nt++8_CYS*@Pv3f8#D}XibmvYTMM*b(u2xw& z-(>PJLna;dl)B?0 zg0#9o)IASIB*FF%l2QdF=H)juBB~d?9N(ElOCVp&_#=nW`G{Sa74y6l`L#iTP~Juz zb#m`SQv+}l?USl|ao32rJrmhc1WPp$A8B450KvEil$G_MK)2pp0ZX_dXEqDxB|~F7 z5&>NNGoSnn@$0}C&_&5F?|2^Z7vT4H^S;(Cc13NM9(yh@&E|ry>3vcG2-ch$C~uy| z{M_Yif*gu`S*%szLJaYYP|WT3VV^;&sO|EPh~P`YZuK(85H$TK&Za-B_BZHmT9^XE z3+!|kO84I}i#h?`-Me)PPAwB0yn7E2NeP6-D?6(=P%7K>X?%sn`7g>07zZ=@@WW^; z)HkkzARy_#R_xL#0dV3H#%aLZ6|S=hc4XtUZzWO84~Jg3CMy-hx<%}j^g_5OzUi>lf*`rK`e7zKvv?5 z3`tkL>+H*9Cmm|eoj%4if#m2wcX3XkX-7Bx*w@aCBP_AQQ3Rq&6?{E+w?yOiD{?Xt zahK0_KuJB#NWui&HjFT3T(qzW(P@DnmYU<~7aUjZ87LT`H-=|YCo-HUjOp-tttl|71o-On9p^J-_VR|y5_hvX zdJkH^4JNk+oFXR3a_WYcg_0JOm z&8f8F3p?Y9VkpE9QSPW%6`d-ba?D1_ECbDwW*uG=yr8QZ;+JElw1sXvnDhd6mx>f+ zRsY3<*iSqpWZXqEjqQv19%^$dzazbWN=0&A#A_R$cxS&qEubOn3Ax2m^Ue_G!R7;M z)TU2P=AKcg0~H3R!I^$LVJ^Eky~bYpv=cP0W2g~0021jPMe>s1wKgw&LLGNlVn2G$ z{pbV(l<}N0)!=39QzYh*(lCXBspU=JH)K~y`ArN~FzUWKA_1GEk5Ef};3j66WY@p} zxv7+sP#Vso=`R58&_3MBH4`xmxp~_y!-z84pdoi8?=MRcHQDkmAFa63nvfyOre9Uy zv4s5Y82}xhibJ*7j7{jyz6B|0GQ2)+NZ;vdR1JV`K&HeE?_z(Ej}OHA*Ulco=11!x zHxVa9*91TLx1}+&*do`l_fa{=`i-OZk5?ca=W^f*Fis6PfyURs;UZK}C&*-0nUFsT z5%_mgz`K4Vj!;S?O8PG~G?5hZ#Sc`tY<(r8JZc5*l~H#AkJBzG>gmx7+Y%?19=*^a z5;1}j3tx+JB0xe?p>>>w(??Fhv&_)7hcaI$?k-hxxo|YFyFbc!KjFd|La4wR7IURA z8g$9Kz%CCXk3iOto^-_VJC8k3`Bblr+O$UZaDN{;DFUDJ)?uQ)D#ETAP9CsP2zSZ+ zMC?QMQBjV3duY`Zk7lTTt8P_aog#=-Wok%G7t?o82=V|L4*WqQ8X^TvaRY|@EP!*m zcP;=(YeEJDEVHA_x~)SJrOpsDO7)60_ehulAt>Cb@G?lSm3w8aX)Pi-VB@!caWkrb zHy%T5ujIFBoD`c3!(qHu*%JdW5GVV_)Qi`zOczTw3DM7rxsgChk%yeD6b5gYCAi$n zASVZPK;`q>~{z=zt6W?z{5r#K7~?{Uufu!g-av4 z6kcvcE0$gd&^`QC(HLP)@lb4Oxi7N4NNn2ow^Jy~*m%03KL*p483|kkI-b#)xuNKa znx8IGY=Ej13&R49b{d>a%4~sQTW0v*qO#J8aPrj`Dd<+Wl8h406jGEOOKPxEpW^o= z0%;(98V7`1DOk0>m@espB~FLbHvba}G>yHNagoyAUr!rzIrUPw6miHpduwCCTxs4k zAjrqj^od;p*emM7Gkt4?)CjU%^a`bkr#iVjK9OZXBMl^bLqN^L1J^y3a2fZ#rJmcu zn=Ih6EO+y0gg@^_q5er%Q*j4$-Jr#$(DI8dqHsbV%N_K$0gnHQ6)M9Pq|gJXHgeGK zeHwk$3}r2W>NgEaLBOK{8OpVV1;I(8&K<6M&lS}amKow{d$kx+7Y?vpko=SmzG(oK zIR7rZl$whSO*djq8crDjM`}{Kt`U2(1BVQ`9zd+?&M0jL=b)`JTwv4-E|QI+lJftZ zp|p2HG)(bd4)?w9hCyiXgYB66fze1|wC$H+jr)NRVpEUMI&auR8>H;44=`_CKZU*{ zEt#@WmUhPy-HYV6(-S)ALFaz>OAA~RNEI=N*wIN*EC@9NBzM!)9VqGH9mQa=INGO% z{9j{ZM~Z2ov`b~Y!$})duL9w&(}kj`;*<{XF?Y8e2--Giz}<2ODeklTo6V{=&DZQ_ zs`!izU>K&wQPq8>s?9jgamHT92s>CBt2poM#()4jpjI#TRm`_D+MUyWnAtu}(7mRx z`2gzCFv?nu-|vz=$g+jqfN`JqREOCR{fmA&pT}7$88SLBfcS5_3@uASu6Pnqy zo!-OP#;=IlGg`H?!>rCv@}br^8`fF)gUpif7i0k9UZ&FplN^eJ21MpHlc0>`WW+QP~LX+RZSl&Q;` zTNjq4@3*HA17WX(Hkzrrn_we6Ygdh%=~z^&n$|lLsqgeoYL`BZuF_3dY1U_g-YFFT zrng5H4PQG|?76n4|DWXQcbYQO40Y5#0aeFS%gAXhq|;$^bb19d*&+sbK}?~kN_uH5 z$P0K}cqlle0 zsnsaTVHh%vLI!)%5#tYh-G->%T*xkD#qb0w-B3H{a-9O1Rd@Ih7|nJQ!b_K4Mspyi zeoa2&zmKonKr?A(KvOXb4Fsp#Qm+C&-)EPPwPUUz$-8k;-)t#8b`Q}i%M1JR`lsPG zJ`g^-#I%2OYpMR|ru*r#Vpzb{O%#rOrQ+H_*Io5aO1*~}a+AM~Ta{$=Xr)_;&gs%$ zrDK~;odG>lN+Ygjpo>paX67#KaMCWOXd>gcqQep9Y@5isO!*+>BD44c_E-^H{oVz{ z*1)zM;_dfHJ&B0ncU#S<$;`AFq0iX5uBuQ^OVO@;ALdNCEGLd%ETg}Jd~piG_Mas7 z{N=AZmy0_C%=ewRwe<*vr}i>qmot$5ew?f2_i=P>6Q%MoH$tWNfIYogm*c%Uudn-d zNyRf4!mcE|ql>S#`#^~hHWrS4l=|qrG@_@aJ99V&SvqCo1y(0=Z(q%*t0NfNcoFt< zCWShd-p|M!S*~r=?MTmUd?4Xd>;a^1$(Tl&LFXeI<#+a-q)%U6EG52jT>TpM$nWgZ*%|IhYpf*h?{C z0t)uJKqE{-G$DSqX<*1fxA9#VNmlIr3OM&(eVHF$KifdPFRGs8CF~XRLOYT&kDj|<<{MTj9Sn+h44U|@ z!yOiO%0bRh*QPhQ^(P!VHB@yJ&t){oD6o#oy_69c)@VSd8TV7!OR^hhZ)IuM4clne z`fW`lI?mjQSIebhK5a1H;)})~=T-Gve1fwl;}5Udp|PYfdv*5l#zv__$~Y*;BHn9p zfBOGx)lJK(<1T*KhEp!D*nkTJ2?_x!r+%=4oR_0fKzwdClZuU!SlWCD{eaDxX zFzkdb)%d4pF3~wH?=#DFT|I-*#jEkPkrj_=*NVH=b9%?Wxdo2SdRZ?I_BpPdcBR-o z24InsCXr)r<68C)Az%37-b}?WAar`8V!i$$$rO>2#kOdv6ggG<`EC{T+%IooYZpV9 zpVL0E?#ES{`7o0THo71>n%bPwm0tkPR}tN;UHTUh9(L!ZZT)A$YMu<_Ep<-ql9koY zT1;4z?iDiW?N;@!)fZoUW_xGYRP=fY|>OD*emB>o)l7TE-AilJgz3 z1NVAXkDV*o&Z>K|&v;VwfO&eOen#4Yixo*s7^{%nD*&|E=CnNd`x5lFf-hli1{gQGbB zQJ<^Ofe`a#ad%AK<3;x+Xb8_p*V}{J$)M;B0A1&z2)K_FE(?bi=xs>wWA){YpWyjH zJHmI@IcguC{OyEepyWV%@mJnK=Hd3^PC?&c`vpJ`oUCu*L?vLxr~fzcmg@^KClVID2CHPu1+yVV zFAg!=XZWwVe}zaQ>Otwy-h0@9{~P7|1*_(LrnOLy-}BkTKka6PLYMeC=@`s6qDq28 zksai+Q_^bX02Ka>t$3(aRVV8P4tb;MZG_lBVhkcQu|#qMhUf&M7!H6Gx)gzR0VgR+ zPnGATFTdC9CxB4O>JM5;ajBeO{P}9Y@zEKUx2p_?rotCMqw%mcJ*-u1SXIK1l!`q| zXz3t~6VGQ-f&f+V|B`0Dj#K4{)OhH4W8gXoi0ENUB62CA6=cT&`q5aqDQC6S^bRxP z8GxQz%z)RX3c#oFb5_(inJ?+dHoQM2?Ifyok>O3e{krm}mC4r0~`We$tbWQWsg@GN*6nXb31!d+A1U54OWNhtF7K--d(nA z6w|rP1pAXt)!!H@qLRT4kxt+ROK|q13N(ph`rV=CmeZt^NasK~;HG$jJZ0Gf{J-(i zE(S{Eb5ZC?CdJ4Gw{;17H+ldSGgF0B#qMjvwP5YDPU$n)D_nY9xEnxyXHa%GcP|p_c3;i9o4u)yJ->2GtA2$S;=6L+nYnzd zg^r@CgW_lC-gsX8GHg#4F2#~mLb6havj8}O^Pp+fJ;)b993+@IaA!T+|loVh2UtrILSfEGgo2x$j~okn3aCE1O3@v1QyhA zz&_um0b#zai_h+V?>;FK{E|(X^x;1WICuMq+&co70l-4etrVU$Q@Z9?iY|Bg$w@(O zpQSB3&m(yt`(Pr?!lJ5JGr4DU{cns3J%u9&#)!ukWmlKbZXu4%mmrY*{!Mk{QW3OG z_PZ>%+X&uE@d97XOk$MKxo&tOM$RAXit^HZ&$x75IF2~~y=^*gw}9BEl*N(JBF`ZL z9nd;>@TevHM92uiGbQvNG61HriOKpO42~zqXzRIm&nF&ux7SCd@S}e|KEKkA)RV8iD*B317hsbsw}kPq&61 z9-n=W#(`w!9l3}%tcVp4s;5q?|K!9}bO8(;yo>F2pjt^dpVnwFRkNOiXsdVD2(szr zd+GFDiQT{~wwRBv$86srdbQuUd;eberT43is+UzasP*Q)GRO9CG5E#n>KB~i_@j?|HmonW z7t8>aP=VnK3D%0(x>bO80d4QpdC5hjVl~cc1zNsH3)z)#J8D99MOU-O->&qe@eilOpx#G9x8_pLgmJRtOC{esEFzEG~6TubWL^eMclE8oZzO#R~fY zUJQ4BW6(2K4>;Qug$ApqQi1*loQBH360Hq#e_cj^Qfz_h-hpU7&HpK2ryf=GPzVxE z3d?>gu)6raD2Vk5@)kbpP;2U(YfyU(rBI0dSuc4tkr$n*&kCkyZJvZP?UnEcl6eO} z2nD-6>W+=-31()TkrsWJ&^ZnGq-%)`*(6f@6Gs&S?WyFyNZmsU!n@%YgCTEGJAf?_ zP)|tM)Xu9Le%PwVlvw^o3h6xKNb}179TMgo!ExL}yf?+07I3WJ@hlXg$La*-tv0~4BN6@4&>SOg{d zFvOl7zKHjI>2{;KD2Rz;Ea7HZv4#a*I14>3KtJe@RyaJMdC2tjJg34?C0QDRb2`I1 zsA#(KtE`K(Cwdi=dsGUo6+}W$Vr z_==p^TjUl)b&;8=W7B1(IvS5`gPQN-8c+SLD;n5ITp26?+DFJGT6P@b&&c%g|nxLHu$rpyfP;Bd@iNVFDmufiD(xKjk;{t zT*W}x10T5Nk9-aEiJ?wUEO61y4OV+TbPeoh)OURc93aNqW#J6Ms5=`Wy~`GHC$`M8 z4MVctPWd;)9<_bmg@_i7`u41Bc0W3KXhxmlTAJ$sECA*gcI6~kT~CiUhLgt?(2ah3 zxVC)_Kt|D$|2W46&dz*f8n{94Zo>xNtekZauV6?dkI5eGMWAvAo6oP3qz^P$S1-!c zH}N+eo)YEAHCGb4A_C>LT!mYDsI^mJ>6y#XJk~Ov>gqm%fS!~SgS~2=QkOM z=FRPK0@kPsj;TN-Jaxn}a1Hv@i0y!jFO`csB`TYW^9rHa6w8VlM9>2afugR-#s*g* z)6v`gY$(9p#I7b(Kds@|79Ih}5jqG>&SHlGp#uxxIB&h_g}@+q?-$_w3_9wRD}kL8 z@(11RU(3_j*&}vSwbk+-s?obALv6@8V)_a>lpi`ink@k;c>!S6Z`fN;3M@Gra^%dY zLJ;@*)wNCwB~JxM_$;gprlFvF@dWEjc%#E6|_A5E2Ce%X@{u7 z0zM|FRk%EUw|UH4K)7}e$2u7XZS;uk->XURarlxoRFV*?>a{5q=C_x*MWI$`{el$* ztAHNEkLE1D%%$tgyW~e*2jYHsp#YP~bf*=_=q*ODtNzsujCnE(Sp~w)YrtPLU7Kra z$$6Nu|ID}?`>7f4F-Ob1teoHVCg=S-R0rIXgivia2bP&Q=<6~_w)BYOlPts-$+csY zEJ%Y6R$QcD8*8udI>D#8nFJE6fSEWOA19KZ#Me1VxKPHQ>RpRqe6&cFyd(Dt}8qRfb(n0Nm1#t)5^1phZM^X)Z`aY(}t^kle2VTZH2ile%m)@qF9{V`hVOO&OML1<46f zFhl(SGT;6r%m&&TmVwg@<0Cwh*vFtE*)Nq|`O!nAl=IxRx^EV|o8wic{M^B#XskSs zH8NaGvcNye;AcnetP6bVcJHU6>9KWeM$2+jsy!Sa3jQwt43+jMhg6h)-E%M#s`E9y znqiy`zOI{szGFYntp*tDwmN8x(APH@?Z(wyzhZ7_fFm7?jUvsBYA6MO%zw*xZqA4~ zlj;}`+>eQ=-jWMrEPopXJ;x>hg0c%}pfjuGymE=FqM(E|?l4Q{Ha%O03A-h#T=%sd z|I8`g&=?+Bcy!z6h8;b<0bTdtpOf%^k9>c>4~`g7-R5MA8dmwW35XBmK2FGAjEB zgkW>ZC|V0x7t$3xWrLuKmVT#}c1&j#Wyoy}f?8t~b@t#wocz1_Z@CTw4!E`}q!ir( za%L5UE=-CN+c6IxFBpunB{EC6u&^(6p>)X}wF< z97GuU6-D}F=wt*$@ER1ZG?!q{cM!HsdKU(31;}2zC-hngia2uAx!Az!z9PgCE~-|p zud+svB?3+Ey#-4Kl7<1Zwr}G<@pAAp!4JA9dr{>>x2VPK+bxTkImy%+qH@B)KweXtZYbg*toIDLCoq+1`Ua1m5gdz4uK$`xtJSm`k$ zKo|}n`V!jGxp*0$3^tHl7K_P;4L+z3%P52pDHl=J-`OC=nM2k;Q~+Y7T^|ITi<5Xn^K31<*)4n})Dte&T1xCBD^&S7uc(UQnpt0_IV~O5i(-`ljc6Ep(`M)ARtSTRu+nbsTZsbWa zvK#l?No<2H6vYs?0yOY6KJ%@sCQsI(VreBl6w&HwWvWL?_9g`T}3L0)yTT zm{jSbEm0QLn*Fojvee4gp>{O>s+62B+2SGj@AACJ0u4bMoR95gub0V%3R8PRf|LFo zs|e!B&vCMq87BY*Nc*ohLUTFvlt_^lZ;_kw8-7IPFCVYu-R5U-#l+x34ACzMo3Hsx zN`jt?P_~Uf_^F{I4yBU}Q5b0MNDWF7N*)oaA}uWzWh9$9PyRHAjL=6-OZV;t$tJJv zh|xQa%-%;o)Hd?dtpF&_g}3Ym#T;pxM>{~}|0hj6G@30w#*j2)+?GC!YZSN%MP|cW zlH-rxliFZLs_hd0TwW{e4(rNxm>|wjv_w8zyiXTJ#R2h-?i3mmpN`oTAyw&v5cmM1 zK`PrpF?n&+F5mS8uxj?TjY+~I-DplW9t+Sv$D8Ki7_=2Yp`RT_%NQ?UN%9D^Sl7GB zc9*}1iYi{;b+R%^G-x7Bf3b($$;8) zsgYFGLEm6!ab89{XUx8H04~0B066ng7o+a_nFmN^5j(|DsSdqII2dfFhs9mU+7I^horvzSC(lxJBI%? zz2?M;)q6@vkvzJ-3hJ0>zhoyD$z?wvFi8V6B`2(94Y-cmNS0h4IjIN8BTg(|4k z(*{jh#UwnYEn3#+5@?1Kx6*Q7mS8GQ(HlkmtA3&OI#WnQrAN4vnlQiHb)ojd3(d?o zihzJZK4zWYtgGnnw1QLkaG|(l@w-Q!uM={hMSlY4^cnc3I4@V*FqSC?+wQm;^~D+t zN<=cg9$*QxX)O&H885``SZ*CM8igNo9-ee%3DNh%;-%30co6CXiDG8dGe+c ztb*sNKakDA8t)RQZlS^G!l zWaq3;RiQFrFanu?SzejA_#=eI0Vhi?Y+86~Gu9pXC@XNf&fza^DzhVgk7=Nwimhu| z2XA|Q9L>)X!L(KdVuJ)t7cl;{wQ3fuLyW`_gcj>{ek<0DnEYv4-Mm>otioB~Nm?+) z{g?(PQIaMSdN>mtkS`_cTJCHSqS?3Nm9;QXxQAnj#y>`-)QEJ8U*Tggn^K{v0KXE& zP8I7KVq2J6C0>1Rh`tx|X@~L7b?<67G|*%uG|;FxgF3E*lz61#ndj&V)FH&Sm!ERy z+5p*wHnIv59)?-cw5Xz6rC};=xKiJAnTgy>o=>)W8pelwt+-jdW1`lm?buI~*Bq$2 zYTX1d(<)}ec5JU(C`?h-4ll(4v{wi=E+L~)QgVZ+UundSS3>MJUOBeD6%01`BqtG| zb*^QYj7iZUF&=G+&V6rT*VrG-IsZiB`v5K)GW`2=!Pv3I@tnwe_W3;>9?RJCPq+lZ z0!7olq1OGiR9Y8O!KA;X5lNKTPGK$()FEw(i+g9MCd)QgqZpE^dg|96R&Q1`4NQ=7HG3=u1ds8! zg28yb{*z(`GwP%9o9grq5P0HE;`1-zBJmv(&foKAC&vSVPQhW-2Myl?`Rqv&cSRPk^`+T#lC`eXYKbOu$G zmrylWPXW?GE&ss~01PIZ3xjULD#1s|0ejkr1u@$g^si<+yhkNLdMJ&Oye%uijmmKm*AdzH#{WzP~U0Sr)m{t~HWl z?d5!BjXh5%fQ_;BlHSOGx45;nzZS*Mx*;5jPCeD$W$D7fMO+D>l}3Pz%s6HmX(V;Z zhwJgtnAvd2&=H0^cdJ>7H-6`whzdX@F2&E8IaQ;?sCqh_yvCW(bJBa&FKU(MFx4tYQ})5C5${V<4xwrY8>yQ1*K@hI|lhI)$3TUY^kK zulWK^rzC=YWuG3-|BNGF$q@e`_=jZY&hdaD$pmd(%<_Vy0G^JJ20Lc{_{q*$*ZhLje)j)A#6}Z*9x>C8({E0G$^X;p!#9 zYE7Xf)EY#yd%bWOa{c^-rwZtjJ4&(Q0n(6ZdQL5nMYk>e0pCAp^&p#0rUA$}kEWnl zNb)w^U^2_l(E>{P_8SiyB+a z1Db0Pk&^ZEsu3U1xEa?A8O{z1>qA>?frVKH=22Z(c~p&V@Tk1c#w;4>*V+z?uuA>$ z*ADDjr88$AAm7$Y@sB5kF$>8w5QPGNh~eunk`=%bLeB+>N!u`1>sbct`O;~pP(!?- zj%elTji>z3DpYk8BLkZ6%;0P9ucd&sQP*%GqHN99*MxSoueqc8|mC7jskF!uqGlgA;7zNBK(z32=WIzc$8k(5H%u#(Bt(F@%R9R z4m?^@4MtV%aXX*?F={aGPl?gafhqZ*F$7ESkpzv7P?0_fVpDNWVv)ew_tXRKk7A}b zW1;wJG)(petsOKtE(0im4>fC81ApB3e$e%7X<=JGjwt67aKDCQ0h>6pgfa@G5D)uLVzICm zkz5JSR9aAZ;N1Bd$X#uULR*v~*Zfd|S8(6(moT9Eb`+518USZNn7_Se4t4!aKrc?v zLNz!}AWM8YlM?{Snuhm7Zr3sXDzs*rnSxap*nhirH4k7cDMi_QFlYx?PVDm{it3!J z1j!3F$k-Xq`%qKMyc62M{nUZ}4&6*3`3Rxdi0H2ByX*OKP}wVQ;^olNG$E^sV5%!Q zta}SozXO^rpb)UH{szfr(Ps@MfQlnaSM65$N?N(TLB=#>l%4%q_kVBdZeBtAn3D5g zcCujK1J?7{+`W1f#Ukf1lK9z4_N}O%x#V?=a0B4c-e%TLI*LQspJi#zg#!bzE4e5> zOVB+CB(-($u@n2LkPazmTP02T#Mntc*R7MVDG1vS1$cHU+%U8?55vt1D?x3#M0n*2 zXja)y{=M|C`D@XsN!$|Qikm?*_-b2e$J&n?dn~5PbtG;~vbS@HbC^Afu)sZlYogi7Gl=%7G`xjJLm8t-tD(v z83S<@Bm#sGzphuF1#qKlzyeLL2;5~n%AKh|U84No2$6~#1+UYOo`QLbGvf4dhr6_< zxNF16!kY^#ht!MO$%s-Utcj5FMIz#W%)%7(SdExdmFeJhh|?N{#%{&7X*RuW8=mAU z6nFxuYzr$bEL=iD&9X_llKTyD_=jPET||JME_AG!u>@Nv9sH(0&iMYuH^^P;sKder zm6;e4lqeY?WnkJ~nNOdo_CpIAXAoK0zf$$9LgVB6h-goIB1xg$tu`G9&!uz?uVWSh zz1^|_S%1;fM%8*3tLn>%d=IMoJZ zp&l57419BpSYyZ!paU(m**t};-0pcn71st~!#_sRDAyrykwU3B z=n-LkqS#hSBcw2Y_D$(ws7MXDz64V-_~K9!9fH?$D+Tq%!P6Q!OkP==Tp8f^^SF z`b#*L!wNgq1HoEdP2;E>nwu?ZT=zaintL`gZgoU$WzH&noCB_re@fd^aeivH&0r2`NG^~@?zhC(GwRcI;mf>)< zp=sWB`rX$C3B6cRiTXm@Otbq==>@FeeMgmMXlj^-kqlc()C~C>EdaMYlYU(94I2qm zRHa$I;o1T!=`qEl_bF}f#Q)ymhk9ANp_+P$^C)CY2+0n8($Kj;*mti)WsP$}kE_g$ z2=9^uQ3aN_9biG;r(F2bBtCc1@5;|6!6E0rw- zzB2^gL=gtOPd-Nny0e-Wq6exNJ7oZ7Z=Wz!W{lW0498Wt&1Bcv*TZmjkG;+}dRXju6Yvcp64{V)* zr387-J-5%M1uZ(;2%yeKR*7j+=x#3j9EU-I;-}#Dx^)e#njV=iMi3Tog|l!sxXOYI zI~(p0El0IVkaJ`>=!TSzAz9^J^;8=If~Q0;>`s<4Mu=2{7*;w#h={Q2y|d73F-z)L zP&`z^*TANQ6O&8Kmbk3 zG5o)aajz$|<-(v0aF|lJA0=tZxek2Zegx5JD~qzUj}jfK?-k_h_3We#6O_@y&+9F{_5>h znh^a;0;%yyX()wD0hAAVrQ8SHb!egn(xCvP?W$+m`whP$ZdxwGy`JmSx$CocitQD8 zz>~#{WkMTfrAAl{pr-d0BUgvzEV=FnKEBeTPafM7sy1qfwPKDAqwKEY-`kfcD6iPE zxh{CD-pT{4Y~6M}%F9-B$^a^aIEryZi%X^?l@B0uyq(E208f1=$&P>o1`1-vAVXA4 ztZ>Z%Dnyrf)I_4FStRGZAEy;Hiq20RMUCIQih*G$!o~Ln5bG{3%91Bg%5BjT@Oe3< zI&9^iuW&%$|F9#5Fo**re(Ru8iJ|A3Yen&ba9)f09-oWuDUtZ(pWSU=_R-(vJ*KD! zc;cav^>*6CR)>M)t)}>Rw=5RTLP zY^$XP)xH=~sQ5f+Duvo!ydg&ktH~SY6^LZDkME6S%kQA3@U@dY5+H^ofJL9ec8&b| zkF?{AaawN?2^0kXH_#WVyr*@GkyUz~*aTlGT5416(<8|Ujlq_s1KAiC;wmrxf`FgH zz+CqDdqow76E!JkEVDH2lL`uGit2-m47JjK3~;Q4M!NqWh@~viws00R;zh(3N(1W| z1%uBBvt%uXbv@*i@tDhqRYF(i@v`)+2fFxP-^c~pA2xTxv7M3RJj%tF{uGZFgU(Qc zjbPTzYh^tbD)>w-*hNPGc;iNYE}p-}(mRps_7cci(uqL)051e8(f%!R`s|!km#IS@6N@G1Fh|Ce0<{s= zqCX=ui+kK}MgRXvq6Oz?Vj}TvH%ytYosoYH_yl z1U;+@MdEFi+i>W3Q=lAM&3pbbC)A#N+AB(N^l|rz{=DAUWx6r4B2up`UaS2f&*)%u znF}?#NFq}|!xlcs`=CxX3jB~G}ixt0udNPN;PFFA$ z8b^Vx8OGlm0x_OhR)u+s0A6xAlMin&J>i5xSu0&^mpoCKc>)Ix4$2AAunFVCh^MV~ z%VVMO>?g2#W=;aGJ2Q^f<*vOb^rn)3>yI$dsgE@?S-^xHdzCL-%x@7jyaXQ)AJ1sD zpyhXlg&Arb$rb^9E~Z$!9x2a~=hx3&6@!f=s4|B0MgN}`Y&nM z$KECNnH$;8lfs)ATlWw4!!C!3Qz|m>x+9oWo-6BnA9%Rq9II?Bbf_LTg&Emn)ND*- zU^o+NpKD@MHsTC>S+&Fw8a4b9qNn#Qq^hA`&8%t%8 z+#^Q9ZOxK*H$v}=Pi&aS@?b?IS(7QoHYWPQ0F4Jl8ukQMmte^f2*7Z;QadS(K-F9V zn2_*VCisZqkgmh_@lg>plIHEG(Q2x`S9XG}kp|(PI|#D10KF403JZbWss!;t=PzJ$ zN&o;oj#ZW^3Tj68avi_O={SA<-q zFW|C*vJm%uRW#@x$~9CS>a*ygzfpfNu+l2=P5Q({(Or(D# zZ!P5xUkzx7$E zUUJg;jdO^*b&4=y?Fr7cdkwbBz!w0{R1X#{K|Otj5*W=XsBKrTU3!PxZ8 zQA*!Rh#<%ej+tlEKN^qA&d=loeSNkty5Rf?+eMis!XJ*`o^3s@&3$!-v9s(CgmAWZ$z^NYgzH_?%L} z2ZadmptvxD7x<8(-ox;!p$g=ZD9N@jhcz3k%P29)vIuX z2Y%cP*9*(AYX9=tUt*LCqyHe2uQpPNdu}DC#}mP0v0Tpfg{w%LKdvQ*_6ap5zSKkBdm*HT@rt#jy=$)q)S* zLjDG8=3ccC6>LStd6$e>GHpxvqCz5ZP2iyiomG%2Hp2vTeh}yJfJf^{2muy{zg8TX zqFh)$InCu_Y_-^?@Ceacz(ryS@<<4i!y^o4(|lkh7vC*M7%(9YJ=I9rAXLWUvpoic zvN$KTU_~m7ElMr^!mCel3+RU9v&RbiwzS(A^t{c7EZC6#sy~VbMI}pGX3-tm6W7+T z^W`k`0o_{weIOdP!?h+(cj(9PO|n5kkc~*fdc*2iP+`q9S3tfcGnJ>P@wwE1AVoP_ z`(e%drkIwFXOhNpTC>}_Q6&4>4i)SU7{dL+&{jfqQLzXF7Yg#mz9_1{7lpveN}*0~ zdk?IB*)>|d7djxdxFT${1m$zr69N1=FxZHebZ$jJUR`ga(*Wb{UV9Q!pB2vhSS&gv z4wshnQ3FZgsncK{3W-C2_N*3n2vwtbw=0PN9;+PktS%FNpIiYP5+{Dgw2} ziQICg^On6Q-RjA_$IB@BuzZi1ED>33fNFEH!b_=Y^Ufhe$Hc%*e(cq88^{y#>zguD zC0kCDs3HW)nyx6agTL6L@$ zC%YHwUs+bt(GMQa9giloEYSQ-zAK1~DN*MksNPxs=hQ?RS-GsZy|7i<+@)`6R0fM7 zK%khoEm$Q^RIaWg@2bGh^YQA9Or~~;JuPat2=-LhUQd&`+Fz*fcVEUgt~yY*6Jglt zPN!6p*y>p5v(Po^Kh%{`5CEbeTc8jH({-mbkk#yetF$h)0<}++X6IALs;L=tlyS1S ze&*xHeX+&rYOw}^N^@os)~Dc{5jMTIz&Nx1U7#uz?O;>d%jDZUiboVOnoW7Fy-{IM z2SQaQ-Oe`g!NMOhcF=114S;k*&cheU(OBznoLHRh?s|P>Hw4%?q*)xvW>^=$=URL! z;{U2}Lw>YTl{f~W_qEYYBxqDF`f{%ltdUX}xicdHMMH-KJ-X%rQ4c+G?ED(6&Pemy z?Wi)k2v|P_yr+plp20ZwxrLwpySinO*3Jz=c+{yi45cmUL~%?b_Yy2L7N@=0ZMQjr zI_NhHkRvKjdQAO23obwl#&T|cT7IwqVs`pTEz=5thpvp0A$jILyEZB*{$4J4-R&wA zKHbKL{$ig7f~80fB}p#1t>{QDmFwd*yJLce^1nN6PZsmdO9Sf+nQ^%`ag?(F<02|0 zTA_z26t641;CkS!Z%^w!>FJr7jl1b1NEENxhWLvWz@x4I9&j+`cJ2-C15BsdrIv1v z5RkCyaHp%&Y5_sJju0tc`;N3W%PhiDV;7`JN5X%(o4oJ5-E#%qpmzjpJ-x?71D!4z zqB=6B1bvq^wR-B#Xz*0rbbY8z!)@$`*o0uhGxe=<~kJW+nnp98(-( z1ly_h!jK*_>ky%F(j_RDJf@Mws1yV%u++EV&(O1)OEFU42lf z#42PNsCZ`!^xM-Ycf`fIuE8eO?ry$T(qe;4!Ag)s=o6HY%iI$s@o}0xvt7Oxawg+& zfUXWiOkXUF+`c3l>aA_3I_SqJJS>)h#1To%P$=Xklp*@=VpYx^tTXt}QhS7ds+l z#Q?VND5oWv>QXx2l%`LZrH}yKDZqMeO&Ox?rNI`Ky4Z<;D;i4HQ)-7~yZ_Ox@MQyU zo^;rLvdC{zQ@E6ijkirT3Exa-!2E~-E$4QPv^C;A18Yqg!Gk5@s78Pa*uf(vEcR3x z;q-!I?W8io0Sy`Gg=O~tCAudwc~Q$)&A(vn@%I#ecfg7sVKC@jJs4POtD8c#xv0>5=oEVj>qo9BEa{VZ9M&Y zeD^$1vwHZ0(#hLt6Gj6TTjvN>&`?4vJ=N*0S9FDrf{!+C(hog?Ni+uUJ9BbH3#E2m zZR&WXww6hY11a~g7U?o1DOLEM{Vb86ln?v$HL#I-!@S~K z(~+xqhk$!LA~lV{H=(hdiGy;F#28k30>r9LLc#aeM>9bs8bQ4mo77*g# zD1a|=B-R8IW%Oi&oyxpM_M??S0m`E0rfrXCn)^pj9knP>Gp!!Uf3vq;$~3 ze;FOj_#c8y(k>X2iYl93?L5f#q^?_79D~6LrWLgk&^_#dZAoC6<*cP8S<%uWI$wuX6`Sb!0NYC@$_Dt#xeFjJ=Tbq!)c)Y z8uR&vX9~w}dK$63I(E-fXOY0#O74C!{ecZ5h8b`owmNHo+c;Ti$hs%7MtFE&K9W5!4?aR_A^J-si3J|F93IeByEKIPL&#AzNU#L>7M?xLoip6s zATA*mBChOA=RZt2#w8~VDjJs=k|VybOOX>=%Ukc4foZ>f^v81K;8F#cDcCo>!V6Xa zKoB|z*+%cUJwVrLUO2a2}mltB_hqFVZuj!VSr>kE9%f^Z5mR^YV<;%3@^O zLR2{Gr1=GNeVH4O>*X=P|cUYmy`6&)n>NiK0)n5`E$q(|sB zeKZ>a$xY9gMfHNz_`2~H0V$J5% zBz*S*Hg&p~TEA_Sl!$GL!~3(2#}<48n#KfUoWj{LBwi2tWw4c_wCXSUW=Z<9K0bTT zWBLR-8Z$N+V1guRJ@d7`zHBRP)F2;u_V3nX-@pMyn>ekWB%F=9@$(zMyHap?5Nl1s znJ_{6!{hZaIPzfU1hn45Sap^Zsdg;!o4t1qQF~}R-T1)=W7rCnhJpCZwKS;u(hIqS zhuQsXD@~pEk<^)u#ZSyg<39RJIlpBPXqAw~ytxqY0#59~ocpkKKQn7I-8Z_t=04@k zyByA4S^2pHQp7zwEU>}YS}@HVs+XiqiqPSyLT07-JT3(q(_q5Gk^_b~ywpku*R#3! zfEJ3Rh{={DsR%kJOFr?;49g0O{e zMda#BV-636)$x`)KLUpkPmElWG4dlN8XxclNpq+Pw$n1CwR`9&Uo})lgwaRpinSGZ z2?fhRV^&1b{g@fch^ zMfW^1_te-B@l<=r8VPl#K2eT5bg>0gl1y@LHi}oe@Q>>TVanpg&YYJ%P`Wo5 zyIVi=_E8NWWM=qM3vZPK@J#5R&Pmyb^`Ck5cj4~sz>BH5q7plUa9$XM%?zS4DcuES zkSROD67pR8%vW?(a8ON*NJnc;3E>L;wpU3=qo^`rbg<6G=808o5q^clYc%-uPbJMC z{mtCrh5^1%nYavPC#w^o)s#CWLKQOk5Xp0Ftgf-_hddxaTF~JRh3LRXPPXbz!%Gs% ziKq@V66m+tR=l4U=OFEdV5u>}iKD9^y`!v!UY9~2y0|HPKg~!B|0tbZX$Q>=opOtf zigA#cOKYvHz|xR^J1a9I9Q8(#Q*^}IJQhd#OL^dBj2kt z!iP+&xuSV|n9h5MGakc1e$>t3rgYUJU=J&mfaT&_M!6S}m3zW@v}?$`e1@4!3IoLH z%}cgFu@4lPzT-)Jo{{&IQ zT8-6%BDqOkG1yO~>?$S#0D*pBc)*n=LK>Hf?<|8LP1wLnMheRO`N0(7q+@F2nEcg( zrPqx6WJM7SS_Nl=pD&e=x{@sbyFxd*rRpCqzi^XRlNN0iFdPsG*FvY_0*MpBs-#?H zOC7wQQ6uFW1H}a+OE69aT_)Q8%(1EI@3g(3j1Wq1J^jI$h2y%$cyZz&Snr7G7 zQXS2?qe;i3nUhB&jYnz1bs6Huz}>1*y@o|%cVseoK$Gd?O1y0i+3u1Szq(W7q}V`R z{5ZB+JtzrQvl>U6hY+dWSyvop2ma;)SA9S@3Tj2iHo0kG~iQeCtrzeq53v(Wa$Hv_A9wd>WlFzW%#we9SlwuK@i3EKIm& zviF5$_Yyw9X#)XmIKn!2&LEh@Cas=0dN#jlBmKfbW+|X6fj%-KzMgnT;KmJ?pUOKO z#s03%=eh_$$r6bf+!z-fi`Cs5EJGI%M~gMK<<N>)`prMkU3ijto%FXMw`7#uxt?^ zgEYU}Rws((I&%=#B>`{hBDCko?jXAnYpmC3ogUXcmf8=;+ZQO7HKPmABs4nh3pklC z*I@_{(J0a!DxIW3O8!2&sS}ZlEx2LJ$3|T3&JF;4Md2hhhIS z=p<^mQA=fVW_kb0lTOOFNra&zDIN+kusM$P!#mtssJe6she&a?ZhL_I*GTvvF`;seBxW)D5 z4DLnD2}jG zxy(t{;?+slri^4Q*!!OP0ZlYYFK;)#-iXw7rJ-+-eya0814^Z!pG9Uzrg=0_>7BNK zqhY>k76frjb}}#1+gVDD?p)`*{hWP|l7roYX*Wz!qDFvRoibZ?2+!ipVv0eeQl0Oe zACfl?5^!|MG5K*nBUs0YmLc|djPawnzMz>B{e%hl*I_bJC3{2>JXC?UShx`wW0NBo z^iu(4vjMqM3D|YyK{sotjOiC!yDsq+syaP>Qky&xOc<48odbOrHj;RZVfK{Gf<@02 zk!d|>BsO^9lc;6l8$V=+PVv)%YD$^Q9Dtd(2bop?|AZ+;N3zBz+a)FzNwtt!Go~qL zq>t-VITRkBR~u5UmSbYGcjQf&Ez-!~5}A*H2<5xw1>h-2dBC#qs&0zDVWYR03iGvc zHEh|tz8=brKCcUoGCc1mos0+jT1bR=Y)e@6nL%y9ar>$Wg_rNYy2m#_0A(l%Lh2l2@3zomy{4}Drd1?78kF6t>M@^* zNS?1syK+A$I2iEA)2-DH2zNj&L|vWqoCG)}2j3(I4o@TEzcxZAb9&w`XqLJbrjL}; z;a0iGNGZfo6gR^}!&;$~>)&RJ%4r7UK}p$&@w z&gURBT7>mW?9x4)0qcHgfiOlN(z-Voi&zagM1`twtO`1u=eFA3A-SRvm8FT{S{EuU zgO?bMcE}wi;v=Jlf-qX>Zd3M_v2_9nQw59&&XUan!$3{@{L1?(U-WWyVu;dFf`C@- z%6JJKVD7)!gH8!Ac5iq1D+KR3JiquVh7hQ++dg6o`EEW@CgHqCOQ=lv4YM5lW9?;G z`bb2=YcB1FZfEaD_w zu3@&iB?M2h8j*b(u1x4r)fSe^Ls*__DGpzQp-Yi0aq~qgm+5%XjPFubz<@JF{RtZ) zsJbIdw4iF3yx8dY=QCH+E_qI#U8FGeiD-2ylA}?NJL$S?z*EgbgY4H1(CqXYlPK!z^#XcvM-{tA>c=Gyz3vXqh6fZ=!mVjq8!&>W_w~5c-$ohOnOMD zl)dQ+Vr#9DdD=%c`I}e}*li0Zo_aEmD4>_pdfVAJY=a;Y9O>b?8-QGRsbJgH+Ex4LJ*^M zZ{m)l0_fv-0K2LDzi z7yQHL&Nam2j$A-(YM6%ADdWj>m;Wkov#P7FEeAgj7?b~1c^54byB^tZ%oKnsd>bP^K}+#SE>iWVQjfYQ*x;FN3cgHf=4&d8xbr(}Dz ziut!ozLy8&Y=x183ATr>z6Fb!TUerBvs|}<@9J{D&~Po*mgw(Y;soGY*s&mupZP2! zxT#DS&p+@mv|TG%0c1f1Zo~we4Mjjl;>_8#+_kqKTQs$3}X(v4U$zD|3NEj ztidVUqfORG9k$xsLoAvMe)uRKjQ(1!^(_=zLS=N<>I1zeoBYVcvaG~YNn!K$DyvV? zpz35z@v=?ltknl{vCelX6|x#1YYaXbS*`3Lkt+j38}-R5eToWg6>CqrWVJR}S@g(i zh5vx8QfRlm*p@*)`R4P->a*K!EX7w@AIQ}f;my#RMOGQSm{VPf!VauHP;`#bk5nQU z!b8Jt#s0tRG7l%WYlM+5A-0d^eHM>QT}s z;XfoW`R)x%sb;MXrG)Vx5M6OLI6^k7KMDs<(g%tZR`!q!6>Ib+=>xqOS^Y?5N*AM} zjn?RuLH7QEMLR0nJ9yK%=(AD@zhe2wM$oS{myif`qbf@GJVweKd~d#49Y*>n@>9FEz$?7u0r<=JoH0rtU8PIf!eH({iGs_ z?a9iVS4drZ1{Oy(97?|#ow~@}QyM6H-}DJ1eTWBj)Fq^B$`<8>^vS>IZB-WX@KFb! zfje0}(P*R9pOg)@=A1i7Cv9~`Z;(C^rFF`eR8}LTwc34gBUQThq}Nv~ga442;DnMn zeYCotcucrz)3F+T&MO~%?;lvKY^}>jm4;-`<72gc1mv<3a5jgJvTa{&4ElRC&VRsK zrPJM`4;$Aq<@?Ycee)%iNJbXhi_RXc?Io29jj;?S_4;U?ABkjr@6GDe(b}G(!lqA0 zp{!i&Ao31MmySLVWbLsS9itu{jqxSYMfyPUwaVT?A*XGKjY)q|##g6v=OBz|q_uf- zRLYO2RF=wII?{oVIct>u1IV1$Mq&F05E}NVQwjK3rL@YYG)%TKU!2Zmqt_+_gCkFA z^VsN|@8Gf$yQ;L-5+7%U4k;7rCZp6}qYosZl=CHKtgg`qnzC7{V`rleR41*k)#j|x zIvZ=vE2BNC1V-Af{!3ni1!C8N##gF?p`o$((I;=Mb< z>9Elm|H0zgd(|IvP`<=MUCm~6$LNcUve>*a`aqRc#vXE+wxuX7@sCAkj6M*9EYhDm zM2txr;Wg^DPR1gXj?qS$`-sY{m2Dkdlv28^^MRavlYPaKafW5Hr5@|7FVa>aeFh$t zM){M9k1P#|yE^B5i^X+5P<;@(38%9<RGIw(}_z@7uUSw`cZ!=anWpy^^ zYcXRC47?qxoDW3XeC{6dJWcQDc>Y?C+7n(M_Htm(dtds${=$lCxdU9bZVsTAtE-eylc*BoDqJ6 zR>q>wIOltah;1(h`F5@ht1jbwAST;%j=CCSoG+=gn_(%2mybb*amx7=4lV`yi__6K zA4tv^D@If+eXTKXaXR>u3d>S_)UlP9F2?9APUqfRDv+^`Y+m7P>?afPRZ1tT|DaIF zoKHBZ{E3LQ_bQDpqik`0mcS7Y0UXY)a?ZqoS*px9uD``&cwXk(MIheRSsp>s7l zmu^D%lL=R=M>iixI_jjG7Z~qzlPo%IGrpfxa0bRQm~Fo1 z=$zDLm9jmBgUc{!_1NYEMF^>r*isDtWUJ6&n=G?0hR;}oV$WZj5&lEOM{v4p^MRzq zBJbAEd27?Pe^9aQ-LRU=$2Y69HXo?LDdjWpc#cZ1t<4AFQjaz%Wlq_w@+p&P+lrTt z`egHgTyubuP3qo3#nouYN0QMtrSmvpdj^&ZV_7%n%kZ(b>5$C_f~-y#{jvE#G1f^j z^=PfSWAlNkdl3EuMrJDvdSml}Sey`Iv{zem#-@{w(Fwgm$LK#Qa<=9gbXw&D)hAnf zOJ!D{RX)&+jlOr_8g(h5owjE1k(^aNP^=Wzw@f1Hu*wQwVu_@-9-~b9t1`-uR5k^R z5_v;YJblzjca_0;sjI35Wwk#`%{vp+hPkNH+JP#b}$f`lIq8<*7l?~8f%1%$|xUl zp^!D_bxPPOrOr9{rhK3n9dj0wEoN{My4a@ADQS}rPM1?Y(38{6$EL?A-F%5y;e-s{ zOKW)A_M!Tll44N1uLg_N-JH?F+N8HBt8{HTo3c9k+GEanm5jawGTNY&F+RjXnUp>y zY)xa~&9yG2baZ1ib~}8V&ZCs<{R0T;prg#ag<|qiRp0bCDP$k9U?(MGZ<$1oI*IR4 z6lQqp9`q(ztpAXRTHSZj55Bhgm^0~WeMr{G>2*qKaf=dK9GOv0YO}?ajPk+1K;(Qy ze#R)Dd`JXS*PD(x=a5!7pVA;%^B3i-KY4($PUhT2=^W&%q}1D^!d`vOqI@98Abqql z>J>ux4~I+nNXAK>Q^*?oM{_YgSF_zr*!&!vax?)i71BEl)o8Qxx+p0swv#QzLl~Vs`9N_- z$zA~&OX$@}$KZ4Ff#h?~{lmd*tod~Eff#hL-i&=QI98p6^CuOU`f9e5e=ItOPQLh; zeLIzpSfvj7bMk?ltFQipLT5)p=g!GiD})U;*WCLD5L!MEZ7sHE;8B^^2Az9}OJy>? z&=z0&h$Uw|+k{O**W`oi&eKPq# z)ivih8uGI-mrOSI4k{wb;@!{>|6+a7V{=aaqt+@bb#k`%k;}$v_7(Z4&DVL&ACnI> z>x=Ux4N~@EtV})-s}HiLOz4~~zUGa|2WoT9_75_wakPevIb-sH9EDT^ujm6NgO z)l!+R!s)ceI{z_eq;O6NYrY#1AIVu=N-A%*OEo5|$I2@G2uPHWN{4mI`YLQvzNPZW zu#_sNyBe8$h=o!rr?(m<`$v`PkR#bnz!*P?KXSRN6}L%~s9>e?0=PJ+(+W-@qN z7MX3`Yb$U}5d1_$LuDeOVL~E7+3B%ZxRt%ImYp)*8Y80ag{9tp0{Kedt!DrL7I;Z| zCLj*SM-EBlVR@iE1SIMsJ9Al%H| z58-yQ)s)ql&)9DMX8X6oa5lKg_ABhzXv~5Fn+yUJu#@8**lsci$QN&{og4~{0Fr`+ zPI%yfrlB7K#=y9$$#OLG|KrePFd#sH0Kqz5LyZ`z7ho*8jDxeew45rKuLUnvd?? zpDpI6Yae6sl7zpqKizF9%$@#j`~R?o1ztfxCTy$0BuMbU1993Y5SyEG%JvK1;ehZ7 z9)jIW(?;G6ipwKT8wo>8l9jf`kQSJ-!nQz_HNKf_H5dpt17oe~-s-c))^&lmRpIG3 zTVvpSS3LvgT0T^oz3zDe4Wn1%myoHlRYx|x@x ze{ETC2r%7SMJ5rCq5ozu*s5^8lSN$)2o`lgxSIb*TNV(mCfnE6Z!qRg7Q)r+tC6QG z@sjjB3eQ4bxYok|^C&!2r9fVwss=Wm|H{4sCn3S{j%d0I8x1t@7}(Mo^OuJsGr%E< z5;7TUk&yrL*c!tkld%h!Psn5}xlknk{}#yuh2-*p{99ZL^W=S5)cp;nD*O#J@EG_d z>4_8`g@j5(#PXQ<2MquJSWGtpekzA`skf?)A1EdjjQtbEYR-KvjNMj)dFGh=1qmKL z`~?Z#xhj6ufI>n8|2Jc+1}{lf^%aNn9E@?c7Z`)VbS#Yd1#j(Jd+%S0dTPRYVK11l zUfBO&d_-A|1&jm<D4n6lG_r7WDOo^t{C zdsWrs??o^naghA~&A`;_pY-Oh#@dd%@snTJ&{XwW;qBiF3B;qJ0g+A6Sebk&i-fH; zg|UDKmdD%wC!u5amI;qdAc4O;9;|ZW3B|OFD0pVz? zvas@m#6dz~nOGSsBy8v$B}m|COlnMGtTxBZIsySGtFaZNYjG_*St1yCNo7KDEhOnt zh^%pN1PcykAR=)%A_XdH3Ds>;f&>bU018c#FF}EXg#SvXjPSX}7{4h@nBgyG>x*j9CMy|x|;!nO9zTr1-?U+@-XtJhRkqyO#( z78IHR3JDg@7$a-U0$FFfzyb>xSz{C?CK1WR2qOvKUQG z8Dn5%jS+y(k1veUL385n!9ujVWK3JD4*lm?b-U)-vJ zV}Yf!rLzSFw%A;ag~682-=U^_@y2-j73N&4dIgP|q{mN=6j4|1ZDPM1KVDi44-`BG4#lBW`Q5b~^fr7$>bpiyt`G|rb zfwCIbtv=zO1q&Vp$Aq;bqELW@B0+)#i^{^fnF(vlPMqr{NrAHaT9~r9mcATt`~wgC z1BnQTL!-j{l_W`05IGhy8FM%|n)tG zk3=57z*9^tlTXm_C?rVWc*_KVMTX;e3B-i*6cLsQYf1Wqw@el=3l=iQv*0JJ7cg&; z;Vdlpg^Yp08RHkcLc+cqntDsb|C2HPvHvV!@W$o@2^Q@C<9LTdA;H2KJ3e2#LtWSl zy*YpB2aaFx4wlKK!7o_(R(Er%tD$ftOg93ysIaB8!t+n$xHPcH_bn_;f`z%(owCNz zIFit@Ncaa5h|gq8MHv{GY^nbz;ZfkgU<~|^hs5OJug1FgAA=o#nLHN%{1aLHKhfWf zwV*bZWEdo!EMPGU)?N$K-7Iu&#)YLU=JE@C zXI~ACsc=LxA$h2fSUwVYSP-tI;nY6ds3lH?Ag)u`11R zeB{_%6$e{v?$(s!9UnQey}%Bu+8D#}k%Pi^bGMbRY&jd;V+MGl_W(`=Vn-Rw%y>(`G2B{!oavR zkAg>DfX{cm)(_Y$>n>w%`(e zfdb2F3gfc^CV-!aX!z>x|K}ft=c}izBs~dqC=8vzqd?*Ri4r7mI4T4V2i{g=gRN`X zNr8fwG(0Yr1`~=0CBnh~Lpe}oI20#hkvs|%C_ItlUx9+)6?lbD*t`Po5&*Z zD|jNuLIMa9K#;&g951;{9uxonk45r8@e0vDk7eQ@VR(%#yZ@i)|4FdG;C(k&w#eGz zR#<0?X|lJ%+H!e{2n*->Hr=YTZN}4%cu9Kp2KddDEz5qy^7wxiJPZFY11MNF;V+Np z|A{hp|AeW#|DOa4{FPN<2=_6P1``xty(B%7^gI(1hzJLk$x9j@mwyG0th042xEq@- z2=^rul81`Va4ZN{0GNDX2;jLq{_^}p#4k`uTaqs90{Jem(U=9w0SR-heJ?r~Nung+ z5&&vrX%-~PfocKF0u@G271kG(iMMqAyX|+b1_OV-8uuG;I2KSyE)Cp^rzrb(YmB?^ zLbcJ=-wE%lp*wc-*Jt|)stkNya%5pjve=oku4K&ceZ}i6KP+9-o_$k|$q^G(z8-}OC zmA#NU7t*aP=Iv+w8=AToKV2)^ow=Eb^O6)nA#r$I8vIjP%r7h~yd+KjBB3xmD4s%j zD1b~T3=Jf{5;4(WiFgX-@%S?tYphPTucv*hD>ryadaP>WhRaLRBR!9Tth0UI!obsX zab8=0FJ4=3WxMfrLy{i+4J`NuuD@C9oGs=ntf6>GdLBu76khYJIY|7iE{nMrpYX51 zza%{gzPcEb?_=T10S5fSr7*@3-zdxcJs5qbTPZ;5Tt?#P=pzYt3P0?frvg=8Q8gPalKMUxo1%KyfebMdz;8=$LfgYJDI%U5jto z7qc&;&Z?^qO4ph4oz*Ly%~JP@z^ZG~DXnq7B-U7|d&=a#8l%fe8+!_fyBSzwM&^;$ z+8!cezgZ`9NUNh!YyN1Z?=6#y)#r}ZC##GzIQrpXj8l4}71sJDb4II$Z$jvGRtNt< zVT`c0_LB9uD&2(Cd6d;XB+`NH zQTLAvjj{eDr0p-2yRq(MeV|C2eE*IvBmh z`l@sm>jS;WghlEV)(47=LE53NMPW4UWVcmGURK&NR^lU+wYsG7x$5XgCe7K59X?Vx zt3z0y`v;J^HvPdm+dHt>?O=uS9}>_p2E9Qk8T*IC%~~_OW3bBi5D}9;N8w~%Uxh7d ztzDYRvFY^HDNe?GzWP8g!uUFy8J~PEW267T65-~{5?_6wC+BnTLFe$*2Xf6p_mhh1 z!ZA@*j-NI@_!D zfdUq$TooRt!}&lTb#A_Q1J!WneC=?E7|6YlI_-&2Z>7gKvLFKz8ZUL3SHSZ zXKaN-i8mgkX={whZU*T_u$w~4Ha<|t(g>kY-et7%y&7z#FH+W)bzL+V>Q?-eZRFwu zWwNG7982XZB4xYT_raM4B2}4O;R8+CM$%OwMHopJckqFvN$ZEm8I&sR^!a>Kwvn~% z#r5D)QPYZDo78Dx|8?6=||4FICsF4pM}>tp@e@ zNF$Rk{lHsu*6IUQSzq?04^&B0K9Ho5@c2O0wwH5}puY#=TzWfvzEyQ?Dm&rsfiQXU zx}*1gczYm;Iq6`ul-AU`uZYiDT~?3qRd?R%y)M-$^+_p(v~KM5@JDH_byiGeoR^i@ z)6F_-hq{%@swaHPO34akebUL=q^z+{#CpXVYki;&jMcgf8QWHBl~6-pt@VLw-4=o& z>Qx^IJL*%q`aslGtL18~8Foi}w)#N1#a3af4@6OzL93PBtyWf@vRW&vS5p;Fwaz+G zDaOW1I}o+f&ES-32ANA!)P=!R9MTK#M;)bf-n%bb!CS)Lyq9?&EX#H(CdJ?huaepd zr&5!F)06ap=-!k*&_vm+Bhg~;jPzD1%-rQt2gVQEkY>m1`8|`>yluAY;qx3OKQFh83 zxB9HJ);b?(GPtYW>U37~w5@|zI_b0;66%mTYA@?T=ag|eoO3?V%#Zm%7q`NzoEc|O zxWcKNQ_ct4w&GIGy)T7F=}^weV)m`Llk-o%8QE|@hPR0H$r(S{-l$#Nv|==4ur=jHQ>Dz zr6Dm&uTVBPg;GNKK$bt~plpyq`Q!sVi78J$5C!4y$$TjXZ%@8_cY{~GFXo~+jc_%r z1|sj?w!))x=p<#~=iE8@Ku$i8GbimtK9I$`NuN#1CilJ+lZLd`Bx@3ahInO^F zl@#8XbjGBmF|^JaYs|pl34hhqSm_CGHC}8q#Ou`zB#oDqpg2DoABe`BErmBqXM~&Y z1~ZrpJkCf#W{h;Y+sdsi%#&6|uQ6UKTkGO<#&{{wV~lv?Mq63badAOYLfo6ou(G)6Y z8{xW4_&`!)Njzx=9wqcAHAUej;RC@+Bc!NDgt6L)@PR6H3MYgTMhGngYV>pY-~)Ml z@IC15WmpDI9h@#Vmcqg3pgZTxLAxEiW~<3u)`8OoI}{}wbl9N3=B~lqq42h_!j-`X zVi0eMM+PB-55yq!#%>0#RJ6_wzBsIJ!d>-N-(_I+JrI3VQVfji!6Vh%LhL5O8)fw2 z2zEEzjoz2RoO9CDt<1UMo9?E!`R=xmeK&f-t9(ACd{ILA+)2EVOxEZ$zN)$mqQB_k zd!W3v<=zReaAT(je9$AL9DEOyJ5Ha^_Z|oXcWythEMh$NW-3nJ;xq<)zMlh4XGZh zd!Xv7w~9B;s;h3^5}ftLDfLO6r0#({I;0i8D7xZwPWM1Ar+c8sgwy$(?t$D*%1$<^ zlLH<`P4H$3L3YjMY12HOo`uKIN8vEm1Z z{w&sT^rh+sXVoj^XbGpZJ&;e@u!BeIZU#vGQRvfT`fK2W7T=L2nMiiIzPZn&6^X7J_% zx#?|U@RnR0A>Ry)tx)OHrFPDo-SzFGeSL>8=89l}ag7JZFCO`Z|XYqk< zMMZdZ3NHqCLo;}U4^(jotGuBj?qGE>UY}E+?}1#t2eR?K_dxDFP=>Xzrh>ZGse2&1 zFW#4>cytck16fwWpL?LrggbXpoH+_J@Yy!5#U*Fkhr{+jleH=c8bjd!BiYRP}Y{e2kPw0-2+Y8Qr;eD<2rNi#W@{hPgUJ4 zQFJUkNtAycFd)DvfJw-BfwCGOajsP*VDhysiUtu9 zl?O#-T{!34*_V|sCc2Zw+$q}jb|Q_Hb;i9LQO8cvzO*K~)|K%l!W4x!QKdHqZ!#Hg zXo}L`0Owj&1C^yvSqXQ8H5T5$9M1KhAc4YOz`r@ysv^p2Kn$5oNGc61n=4aFpL4B> z0162S3M|mBRV6?xV~e}VP*x!!g2FP191)5~#qp8j^7zQ%fPlAUHO_-xj2*6e1$cZWv#5ISLjd)k%9_AM2VP4Ac4Odzo9i10t~kD)mWFc zbf!=cMUg`R!5ct&1PPBo!B1E3ij|~o-AQl0lQC?C?}ddwiX>bcUyYT)cWaEV92kzs z!Er>6{SnJULBX+L!g}LkjXnL8Ek|>-RXv7&j(+|X5+(>NruH{XBo~UL!j}i-tB9*w z*jJUc;}^Vw!=l0sCMdLZSy))xTGd3v!O?qK@CgTa1xZB#lZKb13c)W-5IBD0VhxLp zi!~++Ukp)&0Kw&StgkK{rv& zfP#b)F`*my4wc{^NqXC{IGg(w{;Q!c?6crCl|_a#UB~1lX<=jWPxMbhg)=I`rh^;Gs3tc1s02%5~Lx5oMm$~vMcgDZ?l0tr0C@s!3I78G@5 z3rkxLR1P>I$43d2164&iP<<3R5Fq%6;~g3>NFoiB1W!3oIZ#c!0>*xZfHkx4{?1b1UI#wH?}xIBgAVlgbNMTIReHvf;s_wNP9W}@(QG385R zd}TLYXNt;qlfhMGpOS#fWA??=w)bIZiHHcwUYH$mLHSexA7QSd-6l1TJWlzpBB6V@NGJQgq#NWgf71F?EDIQlsy@S6$at%IAXqDRDoqA!SbIkR z919r<8@#Ors+#b{RXN~7FHqH^;D{U+nS_Ohy#ZcfbLE6zknom>pC02*PthQuJPY1_ zeoMsXx2n6*vGCS-v;QZeP#Rbqk-}sbrpYc$37Z4qK2g^AP8O8L{N?c!N(0NJHy=%y z)3FjH*jzdK%j2I&;W@x3;K&>Yf&>Df5C8-T0YH#I00=+;0(?XQad}h_I4tK@FG&dm zz^b0h<0~W+2h*h}OTm(qu;B2ZnDVv2SWwes@e~$`gAZi)*3DgDiqqyc`{C08*k0@# z;o-5lDNk&^)O~1fV~eq{guB)n%V-L(%tgqS!YOkX!Y6YRvbvL{GTCBF^>75cadC(3 z#s%Sxxs3^{H}>MSwwq}X+Qr63&=r>zQ`wB2+Qu6RtafuPdpY>4n2T~*_R7|Qx7s%Q zGB~R(gVGo+lQ9HsdSh_KC$%iaC2bk%V(^%eE#>jPIHY)A+B%l96`k?Ec%#iioJlxf z_r&SM(`7F{BVg(>xZJR_S@~x6!{Ka{#SXv??q;LxzQ7LNrp{zt;b2V0TVkUrUfavH zuY}E)bv0O4Z-`5MVI4e5D4%Q-cXpF)r{Yawrfu(oGuasM8k@a1R>Em)`a)x{h1gBU zLOe#iI~RwsU5jO52+g1^X5Wjm*dkbXC%iwWi zGv3#}45r)g*<$zb<$Z*lH+zQmeWi`A&D^;&Br}S$KEm@R_ZP9c=97prA9Bx|~X9 ztOk@+{rL+B2(GBj|EcWVfL7aQx~4(IDm z=uFlSXHicVV=G>v27@;&wlcOCg^q3YX8^>6v^C-Ps9mG1$UvYvqT%GIm=G zwm7x$zHCLoc65SuH7?Cy<;PbV>smZ!_$SJ0Rkgr8~BQGaEaI?t!kIu8VzTu$3Q|rEZ3%aOX4*-rU_hrfcEcKx6R4Yir+2 z!Z$AL#RfCrv|$bgQx_NKPJDz;W_KX4-5Sf#p{zCW*Eab^Dl6-X)>yi_6t|h1@5So_ z*-X|HPIoo&sWm2!>93_BE+5EFHb0QX`D$qD;!fGBy0(oxzWLe=?jC6C$!bvc@|46I zrJaOvvRdl{NtD(SlfKlAxU4nSTJMIQsP%zrtM$t4+u*F$N~K9 zDrb$eK9B^bHEWx7R$1$mRf=G7#HFmZLCX3t?wVvWR!tRUj1}hV;YBG{SThq=2rI-N ztQCj48VPr>)@j@N>I2c|^SOMlz6xp5R|9!fYOk_ay}G(@hSm&Ky82Oc6}oB}o~sX% z#<|K|r9Ne=u~pZagR@KV%BqxgwiJU@H+qVYF2xBbm5F)8&(DWp_CO0Onk9iF3O``r=yaMa4R02Lq{{ThD2Lx&Y7dP zx_4qW>ax)Zi#e7;s-j0*qaW>yajgb(an@*M6oQ6GlhG=Z!4obSl^ETaw04jjqY-@| zi7{I1)EZ2L(>m*vHP#8MtMh>*RwtbngHt*uotCYTE-t1d`J;31X*I5lH##3kvJa9( z=fuFc9we7jN2H6n<^xrHK+Vsk1#*`*= zSqf8e%4W73m&vAdC7Y2=DSB+;i)-SeO0#JhBx;p-OK825)=c=UQf`M8`9PA5pk0j~ z_hv8^iOT9S_@t7g@_`_g55yo**>UuvMj4Gd7aK!F(!Lz|K$4s?PKQ%U{-$(ORu>z| zrp!14Q<1z%r*cZ!p-2bZioGv{+&e;wG|DJ7MapQZYeKrxi|(RqQED*z(m+0nQj(y2 z5K3MjeNvK7GC;T&e^0h2A4rn2-4tG(bW)NqC|v>FbMoRObJAA4w%Md?k|8!JP54gu zYpz{(Lr9awnygGRne}86GRfI!h(9JP%UY!;7?ZWe%2EzatIrxG$z!drUJ6rjkTSMd zm2PDr(WEfC87twBUY5Zfjg7|mKp%OX@gq7T#$_QQT+X;X!YR2nDQrqq#!2bCA5uc; zWXxO#AIbPYcHezLW0WyshmT^65;10A^k(oE;{!p^5J{qL#Vf3|r7zWlQOb7O@qwmo zE2Ib`gt6M-i$542NOG{c3`WW6i}S^gT)tRUdOLW08w}y_ZC+%x7qunP>0;#3MHe$% zd?2a0=*)R-(P@hkvc=dUbsact4Hy{LgVec{){vXo`mz?ac98bXEV3wBWRarqWF;ht zvFNo{9sA(4N-isWpa>R6BCHQnx4QI$w;D~^M$~3Z>55MZA4pP<6uP6(ZW@JFwvdQX z_)sV8%qeG_I)by>i2PXx=m=8}ce72nuL_&6N=?~{R8bU+ri9z0giykLorF=7QFSl2 zZg3`{*9d8ZR7W%TjPQXzicy+6n!#U$F2WY!hDepdyP+pMLij-NLDwL32E9I5=?RL{ z2PI6l)CII2j4yir9+Wz}v#-X(+k+1z{giD~ol`d*lupX{QV<>;)Ousz2|foY_&`po zYJ(4CDcgypI~$Y@#@k+l8!)mOe4uKgF4kQ1RT?hLCGNJPFh!NHkbiv z(z?PQgAp#iS%bM`VDM&7+LEy&ua!>O=6pAU)(+c3Ji_`OD0Ww@A-UBzWCk)Rh4Cs?7rK|8uC$R+V(-pT4Ohuin|vfWpB^f zds1gNICay#pY-J;>0YEdIwI|DAshC&kKDNzZEa!nmhk2_mLk>mCfXk6@zLFwY#qq< zq}QHnuf6u7OW}$n+1nR|!Bjj}4+4((V-G@d>`6$HRO_vD<)aAK+8W}tx(Cuk5oUE+ zZG}`}b^CgdI@ZBoo%YpmuI92~H5bWSb^1ZF7(;bLlGN#4MYz;;AVr{#x}UOb)KT{0 zjB+?vbCEh1B9k{5!sB!g6v4u~LE6xJ>&lyM<~E(QwS+{admw2_H+>}C6s1FHBUruf zmB9^8x(Z3M7!xyFXHDabGFC7?id4n8@5bE>UZJxrr!eD-%1Y=i^n*v}Tt_oxGIr24 z$e_CzOFg&glrs9^qvy?LE=6Gn5=GsO42-21oIU4UG@W!8gVIrk))0pjz8M2~&Q%#0 zjq&C*m9{d*RGc}7wFdGLZOt|JK-0FI=9;q=YhO*~vXPJcwawNR*IdTex_E2uff$@M zCrR0AAe=HcnagCXfhm%nY}ONHD=wLf%rPfDtAR+4xyBr0PDi*l#A&@7ow2g6NVPps zZ4VU1THA!iM9@2;wpJUfmeTe>(%jX}?SV41hDaK#ky@o2I+ELn*tb!MqAr6oTBn3W zX^Wgqs%Cp2X|_z(72b5t8WSNoo9$BeA;PsG9b5V6Zs>=q(iJJn_CQejb0=kcpzFa$ z`BwQdxT<}pPL^hrIy=VBr7C+FB!3izBhsaAY~EsJ?St2SH#CEkb+ut5PGO5af|7M< z>ykHiL->QaV?&oUw1DCqB9ql6&o9tb|ubU4)xE(peUs zh^{?QgG1MzYaQpBYZ7yk*S4`5h*Qc>8xv(2KB{JG`cn4IunbnEAs>Czu@Q%@z1AFD zYmv9sHWw*l3!$hxZ?$zrQ&F};GEGrDvJA{cAvHLB8P?;Yd@1UxxMQtVdN+LJwN~|k ze#$lz!NR&@Cq9ZbZ`6yy6OlWq)sA&=R;O&EZ7F|}` z&Zg}Y00MiwZlGG4$w3SuWDGLK009615D^i8K-&~zDI8vSBJJzW$UeV8_A+Q+*YIpD z&FgRX#Ri5rIw0dKPeVaPmx}!6w%dk4aRvdSsMH~%8Ugm!oxo(1MsdT7P+#6nPvN4M<)pgUw}5+0ho;Kx&x^ zQEbEpDb`*ta8iuK51x*Wyj9DbzSTx^Xr@sZ zC56oCk=hVQ*+7erx2&c;;rOzYIUuo2XNw9NxZ!M_N3<%Z@>3Vb{Hv3tLrg3>i7Uwp z&6fNvX|+dLT;z==ewaM6{1Gj;@e7DB@7Rr}LA|+OmR1|*je2efW)tcW?Pxnv+7lsw z4cXToMdY)9OE+11nj|))uQG0D<6xiInrLKITQt1C>lHG)F(dSlIpcc!@ z;P&b%5xZHhn^Qr0rQ*Y_cVHBDL++(rd3(M4q(p9yw9F5ac8|FXLh%{@?H{AN;NJ?^ z-&IG@0^E$L2axu#MIZ8cfOAuN(CbH~M)85b?E|36oza#l)~2=Y%3a+RXq!e4+)}vX zq0$lZIA%;bg&r}`ZQ&@iIV-P!!q_^B&udQApC5>$D`iZyp3N3|80Qwz)cf$;-Ogjz z0}Wr4nJB0i#wj6cR`b6Uh%i#ve1t*}K`$q{tYQOgA34;IWU|1*T=54AB4ueQUHTNt zdVGn3OSEN&eqFF_xz^tdV!B0tQ2)z%cLG69508ox*Mmq(feK9`rMRQ6P4eP?EAo`q zMJ>NmwxnK;=BrElil*wdRg5q&_teMuz;y4}&x0h}klI{AB~fFj}!Pc6G?hHb8<5O>{9x4rfdb#fMj`%9N2x z?as2Yk_@`U%S9(;f#Yd5dcBfTT3yMOVRJy&OR+`Yz}8j^G*tJkJ8%O~W$9p(LEF8f z2LB@~DmIIv|B)hL5;W~gv=WssVZTBr&d5yVNKV~_0H(4+5NU=yON9*b%l?>_c{f*J zt60l}hGJ58Yk^NxATj_IQe+?odyBm7mhWKGk0}n$;}Gtik!`wg9|IUBNOs!$k6OhS z$Bzx3BD)Bm^T$-T9$(_)9Hci37p3v5BtBp`1pAoq&XwscdXi*<@lq>#?DQO4Ic_-H zK)=JcORU-9?-LpQ(1~p^LW5JPd)>Q~K z8?CT8D^e(y%i!gz7u+c!XXnv44%>Tx$p^#1V&>VQjOEJ$O{1)UH3|knOV=Qb*{nyN zO`QhjVp40Ytl!D2d)}Uw&zPlD6~I48h$GB}?R4@il;O4s-zLDd%sagW!g^_B5H%nBX%Ab{TFg*5Gg3Pj$Xx zx|`09@pYWQS3gAJV=u``LFL8`OvXvE?uhvci9MD$)Y=E$j3U|t3t`a2ZB2xTyng+F z#J=!Ru`#%{?CbPzERe7+Q*rKIS=w$)m&2ZlhI|5ron#xD-V&b7tYr_f>R^j(I> zY;T3^=1P^NPb9*4k>t(sf31ojorK-un~F1jdCV5llPUP&0yFti7wDfb4{Xv?4zaLo z?_jm?Bapr3or-mAmcWoa_eGFXs%>YwQ{`BpFcI(8S;a+ntuqH?Cq`K515Ny1vGpJe z#}ZzECOD8)+-ZCYfSKpiQm6;w`C+;dO1qywd-Ebp8rZTX)7(JqTQ z!u*N(2&@&V*mF*=TUGBJrEeviovMIYpZ6W53JqpIo>k&GLd_lu7pVTTXMQMo0|9ie z+XT%?uXe+@t7iOGEfCRqVnzUEuZmiG+X|f7nsMh+a3`*#6^OLLlJ8Y4-FwK$EG8?e9&IYV&{i_~ysXn62H0R=@p0%oX7P0QUifY|hP#Rj<$R7T z3UaMu3wzr$X$8o_B&!@0QH*NDx=(ZZdt3F9pU#=w=YhWXS)ifjo-0b50^IZZhJ{|S zrr=xnOENV^#cRff$rhpMmEC!TLm9>~+mH23ydQ-c)+RJ~H@@GOJz z>lJ4(wV?4bv^Ru5fVFB(Jg2$?m5f zwF@jmQ4JosMwpdQ#NMVzVUJf!E8N1ChSis=*(^xWTrHRBFmi-8MDdD#*@+__(Aa^C z?znoh)#xe8AV%jX?$!x@Sn35dDXkJ(42tqD6Up(xzq^vdO#_k-v=FN$ogksej8HpR z015#s6p7ndh3g*%)j5&ik+Ez+1AxCoaXo4C5%`zEs#K&l^g8^}^^Tsm{7mrV=-YVYuLui_`5TLPGlF5GT5Us<)x5RG{%+e zx>XE_*2|F8j+%*NvU*>!&cxZnVIpY39w^}}tae#_{v><_4XSnMjX^m83|}MHKmHtX8~BXXhI-$E-E)WpJ|E( zuua3Y<2f=*V%=vo!6PqU?hEhQ^QK_5rx>Vx7Aen^@NewU{NN3s-oK+lhq%0p#@%TB z_Y27+uW#!_UR)-hRIs2b+y`huiAoES*$g9KFwDtVPVVQEazdaro!*Qbo_bV@`lg#l zMmee831w2rTnQ(|?7!4GPqh}C%fG>zG(FjaZ~)EZ8SPziuX7zK*HCW)u+#=iQtMZf zabExE$1`E&9;)SJHVv%u`%TmJR})_PuwG_ydzJhr+*acf=s^f4DeN#hCOQw2og)5y zSo?6H%T0BWx;e5M%8Za=#{uJF8ZNk1+yE_n4_=T$9RToTvWKPpfHAi>M?gfxqK!K2 z+nlzTphUt)^HF&ijeaS%HcFQ|$7sFCX?Rybu~GY>;eKX=H42pt=gq`vDGyc{H{Rpa_9l)8 zjy4c6DvSjW!Mal&9EXvR9Ml6TT9}k2#}E$pEt)-m|2Y65jMsu;c`2=!6JT9s+RPIe z0(B$I;3*#NG~n`K>=F=68Q6dGW2G~9+w4S02+*kd7W4Mo4{-Gb9V~QC^_* zR!$dS%NUaoTWpJS33}P$WQjQc&DIU!H<#QY)swrnqZqCVQ|E^XF2`#{Qocwh_AxM# zHH&ccxE<+gdb$906|n*rQMaTBhz2AR*NR``>f&vZV^4OU$?oMG%RT195qcvkO|_S{ z*48m6Q$jiZD4okrVla|_=|Fz8#dBJS==oVi)hO3EobfD5D9uRH3K{W%w7M4qa zA_=Fa88t<#u*&+qy6JKT;=$OC*RxU?{k*NT)j#lHuxq6aEmI!6Xs&;4h%t*l`{^lV zjTk-r^|X+duU{G4k&C)iTbJtK9ac^W;odk!SA>mATl9)mR1#E~emnc|-XCIVsLvMS|MDG=W4wx8l(W&4j$Qh5D(alwoEk9~! zb_$!~*?v-T4FsBdh4wcx(lhs9I*1o3B@+ExrPa(7&fO15_YqyRd=BU~B+%w}$^gP0 zzq&@sJOK1*$@>ckiyrC95q6Rc;4L3lQ zBShpZw+kyZWVXDks>^<@pO*hI@rE3&geu$A*D&ae78Y;xq@%l0Er4VVt5NGMhwj)< zwiZxBjq*8sfh!w}q*Ot3WItLQ6(PnF7}2YCo5z!0poxO7*#m^!Jp2RF*C z6XQolY#BBWwh=~!r|%@kNQ;@DqU13evIs^A@$(7Z)j~EU53(`ue~otN7IMvyK#4m@ zfCY2E3vLWX<8U9-=O~N13+)Yg&{AytxNl=k-Upas#poDj^0FS#H z!m8^-GlPu?1d^1H0_3R65OlT&ptCh)E{Zyg2%0gDF@fs=Iv}Bg7PI~~ReK5;wwrLH zwsWTW$kof6Q5@yF|Ufpo!dE;y}fK&Wbc(OG0e;Fzsj zReE6olD)0TuH1yWJxsRca3`B`bQTHxJT^wxUWevja9rGjZ=^vf%4n^ghRdgslHK~M zg|wq`$X8TeS<*=3-~mX#w0pp@VDgIZ&I)xbn0o4WXgOfw1JR4ovq4cw@Vp<^kAFA{ z&6lAHdv@d~H1<+JZO2AhHN=##oc9hV2EKWGB&@`ZL&GMn%-`Z(SRUD&Y7sD_4O3w| zYEmBE5OcuJQKH-g!2pjXYmTWrEqifsa1e!+vohzXSYb|^o(&L2&6{{C2`2j7rV?r8 z9c*5M7W{f(vUt>KROAAPZ}mia-mXD;kMPy5mLk_|9ad_2<0Zr9TR$-A-o;<7f(S~* z1db7^VGko{!+23$iQX57apg`BkvX2C@P!|r&Q?F{RsncbeyU|i&qQ^|7MR1f&jlM{ z@fXlFwT=>fg38afLm!Uw+Z;ytqlr{5`n%=hnrdCY@XbzrbQzy<_E&R<2I=mhx$qA2M^sdOn&0Wdl8BJ+YnTY3Xd1NLmMnf{DF>*^{#Q&^2R zrLyWt30(jwF675Ctg#>x4%!1N<^VL6c<;@AEjs$iLpFsNi7uN9K-4L!Jge1$v?Np+ zp{zGKzq>xI#=6bP*8EDQLlC^%6m|h*gQ9qMhEL2Hk|dUeYCn^CkNzircv~=_B*CoJ zTPKzjf9U*9-Fu)Im|X|8FB_D75Z>9Pw2z3!{sYejr&T=IkaF3X17K(~Ft>ytHBGiw zfYZ#b70be6?g?Az3PRo(0GKQ;)|o`f+!S=()VlJ4Rn8(;#8P&6VMf+NCs(=dDX^q4 z$WIj!@Z!Oadm+fk6A&6cVXM>j<7Z{R`9YdP4E_W|(}{97`Q1ySG&BN#(z=P3r2-$*c0 zkBE|BC;EU?NO@6JYiWf822Yld{_PURCsE~1ph7*yUHm=Qy)BX9PcGefY1SHHfD+jRY!!ubeHXh-EuIaHB|t;#bh0`oIS9!+TVy|cYI^Hqn2F^ z_hf}$SVJA|OE)zdA0Cr**bVKGYQA#R8=jry8#MLvXeZHgIm%VruwF#J(Puc;>vjM( z*kcbEEfA&DoQ*01473bin>4-ie+C9kx&{LfhO7%K-7Z*9EfgDJbDy$FQw5RArZ$Mx zwiS;ca+e%)x{ZAMaSRS`NrD+4PNk?K{JTj3Ua3= zulv$nF2z6b0?|_)Wi_vEzHZpxA3+nH`T;K0fCe7fU8^TN(j3+sf06<&%Laj%%Fr*= zxYW>xcu}XR>c>b9kO>hKHiSbc3O(BrPYQHO7)Qz3C{U}zZh8GUs=*B!c6`~sl! zRPQLfOg_dWpl>v*ti*J4Rz|B8f0x1RRRs$bQ~^DdLDSZ%3-aEd0rCLAU!Z}lFb)s6 zWcX*6@{nNQd+~4Th20e1W2)sY2(XuhDG5`f=roQ@9L^bx$#9xoL2V#&(x{>853f zs*QUImNVxeL7cSjYw~5u%RZq%E^d8o!Z*J=|7oGe(2v6MhnZo^oQHJ6aH{XQD*JC| zq))Rg=sT-&up4KTB}nNSoeacjU`xXuDa%2)<%RICKgSh8x%%Mb6ro(q5-7DVJ13O} z>&2PTIQqYYAh#=TjXL+a<9KIH-r;*!RhA6g`PjZ}FGyvFrKey~DkAe!+Y&Wk@+R(i z#@g{f(`O`HG{Pe)6vcEFKuH8{wp`rn**txMME?bNz)JVeq5dR5(*9{XEB z^?;m)OM*YdBr}(o1@RqW<9GZn6a-`_ks*ix=ztaeTLdn=OnY|yPS&gTaW9oh8CqY1 z0D3TxJXdqIk`-tw9a0g14PiC#)@Q&G_c27!i~>ffR5LNIPxOubJ#nktBPFo#9_P(B zZ`C=LXRkEy!b>-;GibnO&{5l zBWSWi6to&0i_x)&A}jos^Zq|U?}b3JpqPJt8SlH>|FSFw(2zj^S%;*s?HwzKg zDrb@T0lmzZpW0vuv4^6I`H4Zi#jEFfh(q%Y%mnHFte>u{8W!xN>EgQjszO)btk*29 z(<;2-W$0@-3GZn3Zd6+Px)v%$vWCp0`8DAB)Vn$b&zBKmhka|Fop1Mzsg?7@j-}Ue zB(BU&A1)XXd>?=BHiCfLl%}XJf+&kuSIz~y?@@uqx?q4wl6#+W6qv$QmJPB-|;pCn@KgjBP8Fh0GP9P~>(|1ADp zvS0L=prKTtv_nr;-u85;luCmD4HuMb4H$8d2e@Hvnq!sry(numq5jDYkVA3O?D>dG zxDgR6rf{E((=WLPF6&qgRRiK}1dJriz{VBSQ3}<~CvndVQX^5i#0un<9iAxTZ%O~r z1m8=Q(j$RHB43k>+St>-fs<#P-o#r}d!zquYO_BYV0IMKj&8$in@w0rgGExDXdWrb?4vf<&-q5-8%tU_W zz)1c?nW2ifi62lTU%4Jp?$VWy@J~-W`yb_$&h2>hTcM-MXl3W;2gy z!t(A{0XS{U^Ye&A%fJ0#_3Ki@k;pN(*%Qg-s8Ohg>@q2A%K%#b5c?S(W+XR~@zsNT zBebqYVX9lV<)AKN_}C>40AgO@e@FeAYTgF5+#GLJ2-wT6=7TS{ue!c35w|ZYZE{he zZ&SUK9msKnfP>WKxNUbw4e`h&L@7eWrglhd{ZR@bY^a7D*%DLUKek~qI%IndF~TSak(zv037!~Sr)DarLjRF z{26KluaXK>HHCgid3=4TL!wzt{6MS&3i(O?O%t?N*)QwQ`VUROO5h|b$e~C`EAw)dk!&d3E&Op?Cl8`}Yc$gMB$dmri!P*=Xhmh4haH&e%Mp0+I zFZ#t3r?+vEgt{_ZbCM?;@`jO>|FBn;?PxTnTwpS4*VZmP&M0le+k=S`nS2aaykp#1 zM-u!|o%AsRgpp?<1@eX?(Nr5S{0VE0b|#XWIK-(+=)&&kF4dg&hPKJ0nN}O@_h@(_ zK9J#+UIVQF#CB}a@1s(P>~yJOCe(nji6ilM&B%<+`8~VLA^`wYG!93K&#g6EQIC)& zdR@RWf`~Z`Du$@@=7=>gj3RI9-O`P9ZR?VK(oESCl;VYnnM`K(8YKWHK-j-J%kPg^ zoYL^86>L;GLk7GSW9e3ig-ukobNuV$gKz{40N=s0CT{i?f*-pZl^KXKpzf7w zwd__9{X{<%&}X6)+WM>*Wmv7zYwlyZNaPq$q|M0@H5#ZH zUb+w#l*ivlvF?R}Qw5%827FXQ7YZ9K;ynVv&B0O~y7HETD(W^?$zF?!+*Rony;>3h zTP{Tcv%ovCt5i-LmoX!iGC~D+Qz^+9L7bCYd-PP%+@vPUKP;g@RC&6oPh_;^; z;`e*YQ+jGsz#d{yxRhdXW4$HETsjbY?eL)?6Inp1zDVc?NkTV+3~S@jG#OP0aQsw=eR)2ATXKrt@)U~S<9SJ-&y_p zmc1Bm%yP=z(E9$*)?H9I%ht}~1aPl)wnq7kNI4@hmmEvXhRSnMCSojN*deOr+KO)p zT5|(^VH!$M-b^Df{)ue?Yo%NWw_+tk;+;t$dM)Pw$M&dG3Q&a3La_1V6ea(uy37(n zLK9nkPR2p5-?4xR{L~_0*cBr4Fk+zgK}#M@`jgv!Mi%{IR$)-Vjn7cC^v0r#7%V-v z8Z<yzPAIMgQVQ?N$s? zs&-?|px`F7UoF*oa&HzVcX?_k@r#Aj8LHp#k8FAaq*qfN@!7-e}SQ4_$@OOMbO zp*B!CaCjEWY@UYEmadduLpMe7RHzB?mgqZSmUUnIwhT#1-N^YYMs>c!n~O0nyl9@j zPXWv!V~sMH3Cvi>Sf_4Qauh)nWpA@RI6TLaV+ui%;kb_R8il#$QM+u%)03ZkjKe9e zxHc8?a8zW}3Kxg9tTD(z}C1Rp#Gc04>C((I(uDa z1RAp37l;saY-X60yib?K-0p=)KW5m)7gky*V~-I8}nmtFTr%3nTl4_@?zhiO)PK1LY~Ip z3Z5bTS=z-N!~n9~|5Rtc&|}pkgxZ4A9;Te`4UsYaPMWP?HMsBi;iw&vtmRs)tKUhl zgK-9GCm{<2>H;g!8YuMmOa|7gl-%OaJXt2#@Th)eJa8!Y|5ZtWvKk9r6>{fhp-M(S+jV88iVvs8CjY#ihCbs$}U1vr=9PVTEK*(=yu;m1N(EhcsUz^3hr|6*|*hah_ZY}RBnJMv5lNE%l zh20ny&z(qqVXbKrLw@62TGkDHh$(Ac#va$hDVonz&VY9m30C9=nZOYwXB9K!xJ?c} zVj1NKqMVn(I4P%us>%XsGX0C0f8tPJ3LP;5lPX3Gz2T~SbCV3w8EVHQ5J%^NV!1aW zt-gLAhFQ?nJ{>yzWPMy5r0AN?_mjbgB>Aq9x&O1`i=ITanGX5*Ib!|YlabajL8EjK zuuB-J9>(z&>QJ?6b(5X8VZhk6$|HB{H1Kx2m_g3yP1YQTz!l%&!hF~XD!b@UvLpPZ zVAbs6sH=duM!P3;6?9uI{K8x*q>^No5QHK0xD!MTx=BKJous!bX>DvC#M8J7_bjiO z@Df6d(VC-sJn2nD=y~Mv_4=iXS-t)QRf1n%n8C2vv#T>$z41y zFd1*AURLf9#|~~CL~wrKTW^Cq6EIp->5%0J33Y(X(Le^79AkT88lN3X3q5H@;@b$ zh>Jj9S0elPO5^5w`5>Y5x4!!2iVCj?>;-hkNRuuq_>|7@{M?t-%lGyr`S9G zm>?2Qp*G&SzMX5|v+Me;KbS&Qdun{@0+?2!^RGkK{5W;(6LzmvDx|Rh)?>#|{!f*W z2r&YXi7}m1V5`25Ktqc-fPkI^ALgYz2aa?G0Bi4cz*u2I!LH>TraIej{i47|d(yo( z3k?1RJwD$v_^mYrYlldZB#1mr#V(7o+JB*=z$*sYvM-lCJ8kk$p&f@d4JkDS$HQ zgv!Kv$@{tD&#VUTbQUfOQDNyxM!@VEB6fvgA(cZB?uud-N*FamRFUU9^1Uh=ax8k9 z!AcoPaP$uOoS^-M!a>CTM8sVRG|h2x0x?21RkG9` zYuzW%3P*tMSpnwND&<6k_Azhtb6-b#Q2C^JT$&_kAiR_+36e-PH0A{Ax=>9eI(#@C z#KR_rSI`D(Nz?{V44@Emr(**M5KZ35VFm$=MT58NAwI=6Oz&1TaRj)?2Ih&3&kX=9 zlFt-dx1u#Y+~XDD!O&se-Ox%j)|ai?#5V$UDpE`-%u8C6zNB=&Sd0NIL*}&tPjdR59^WI4geo6R66u_{4~lj?f^PDkO$#xsD5U=WwBLM8Ade;yQ^7)-V%03WTddfh%F|3`3RB`_6U>v2OV2)kx`~aE2+^92hW}2l+`Z zbOu_$H;wdqy+(8R23@Koa~?^-A?3s+SkG2v&-46|l=+T(SS>M0mhA)OxzkY>m<+N% zk=Ga8EUAZ$D}1)wR%l_drV?N5C_3QRZ}OG7}W+nsV!0U zOfC0GE*Mj6P!~=wzjnz( zTgX0JiC%auL9y`2TYWiPR5G4R{CYwNqJ4;y00>ZfZOHPQnDvM$e8q<82vne2nmQ~^ zmu?ZKW1>$_nBM8N!(o=~5Wm{U%wIkT5^l|p5vf8HE`VM4OjwAvQL$z<93RL4@?dT& ze9iHQH)^%F6#J~aI`|Ny#4?I?Urm12e}2F z`TqO1C$)Y@)Y6kA%LZqsn3gC#BI&D3PS?&-t|u91eq{a$K))}ecuq6|(9Xykb4Ir7 zx4bCHUq~kn=Cy?-2|^Mx63SfZk3obQ)M<^`+sp@5E-$Mztn1hd~P6pU66F|Zr(0$tZ4=__i7ZpN(U zRb0^>f(S}7YRQrCHA&!cb(Lo@UuhHrw4IKMt_3#J@zVLZyGf|x5Z@Ns%b?IsuNOPa zMH)U$^o8QJj;z>E&(N@ENC=24=?*>bgl8U9S@EsUkD{KC|Hq(0!(3Dvt8E_htCWo< zf%#Y4VsZmt@Lq?)RXMl{==;zrFQrySa3_(ko8!bLD^(G7b?CW?0y9j$A=n}~!Bh&E z5G5OtfGgLp3Cz-*v*WCohJQqyXq3|`mg{Mna9lVpv8RE z_FO53b^x0wd#E^WXL*~bc`pU)cFM*!FG+0*8WGYAl6q9}+hCtcDvg#7aUWY5hnYVJ zQfo1J7DD*9snJWKhpMYPm6=EuMVL8M{;n!#!H#buII6-mR9da+59~V%s)Y7LTKHOw zE)1oEri?%!UC0WSnqfI!;K-|wXwo8qisp(4Oa?K4Z_(r}s}SC+tMm|Yq?txa1phC@ zG_xeOCG}Ad6*h%cghD+wQwT~0(HkpzK!`;Lm8D(^xxjx)Eem=jc^s6KPkN3vV@T0T zIIaYl|E&m2F^FWgNb3tq9FY$GVMN9)xXjirHbyIT+qNasWh8|JAk7_ku?;e0@> zkSbyH0|Z=()owIX;9v&^ugQ#Oeo9eDMvd&7)p|fDe83c4p~QnMeN-2u;$Zcab;+tU zXYq8MB>@6`}BDFKqOxuQi2gP4QdF-aj@;SHueL1|0Mlnvvo-Bhe4yn5>{U7%hpa40hJt&RHu!7cfID=|)hf#%@MpVCpaLI3`KVJ;&A zaEEWXSEr0Egt*~-e){q$I0Z*-iqsHeI{T@k4p0=6$oQ2yP%e-f25)S+QPF_$1r;cu zuYwhDnIZsGT$;3JSkDb(Jm5v>(n(Dm>_$tNMWp}f>M9kCt6)+EL$7sL6@Zvsjq8A2 z%59H`SO;$?8R@u;bl4|Ra3u14bWlm<#i_*0;GQ1I39FQBsz-fCG% z%!t>woqaSLF9N#`%!T+Nf=s>qy~!K&$;kQ)LP%g3l=f1^O5` z=qr<%fw&m4z74(w+yVPW7$vNxYXv~dn>=9riqAa_@KCLS)KUgDJ~rFSn5u_@)=7^a|0|C3v8K=E zh;h7FYspP<-;UJ&aSNE)$)$pDTq&3pnnS)>5L@{Oa(2BD)5kR2eC6fOB8z6Bj45RyGZQ`7((rs6c~5-rCK8hZrDy=xoL+`Jrw zSOy52-iMO{oe~QEXTjQcvVQ;6C+)F&2&QT|=GH^P;Jc{D9vCrjmH(BKZKg@*ps;Fv z95E-dAF5+N>ev#Ut1u=rQ+QDl>WVc6QVVc?ri*3xI1mLIFL)vbsFzUodh+R56nDq0 zJy-XkCZ%JQd85OF<}g^3sknY% z)|}XPxNkKmRH{BOy4mDU81AD5@S&yrGQyo+Pe}R$&M=dej`7OW`pd=y?Gx(8id(4r zo+d#?v(Rg=ipzlm;#bRI$0nNu)`DQyceC9=Y}RM&61+?vGnCLAbD5mp1K zFX5?h;0ZpDX+IJSk+OiIkWG<56A8?QWKR+m24PxKmEg?C{FBz%!@SXGmO_dAEcAe62y{EJSIifZFMJs#y)pw4DfODzgZh zvfB6(PAntq$}3z{9>ZaPnX(F)xadbMB8&y?!Bu!__Ce%tZV=x>0>E_;CY%R^>zbgF zE$X2$hcNff!r!Wh*|PXB$XyPs^qK^KR^WUu=xF@8`~!IaO%vwJNhnk(^LY~A>75fY zk~(2RMMl$B-ZkYbr24deg%z;bq;2$>6|Mqp2=zR))A~>tLM!vG)WALQ@UMg^f$iwT z5$cT!08bdMX_;C5^px)9x*^4<9J%p9fwuCSnOvtXx+D}((k*C3Xcv++^TT?hamV%d z)5;xQ&AC4kRK2g4%&KQTzrl$Fd zbXAnWj_EF{I$>rIA0k-cm$7UwjEyLCzhtbQG>=)DeU?q6o0ZU~+qqvN)i9B@$`;`K@;KZgL|+ zftYj=qpt6sP1Q$v4;!N1CPH7i9c++>9xLDpQAK1?7Rd{juauc&2#YrH>4{J}5qKEY z)EK~krcG7#6k)qjnJ?{u5jFsYupC~M z!fGZh;0G^mG~{Yf<BB7RFIaEmXqCnoCLWE^27AuTwcRqjF?$ zlL95?oh$nh5im8_Ifp%L@q7zB>_E}#vN}ccW;`d5C=>t@O(|?3lA{jjv3_Bi(U0PI zM=)3hLL8QhFLIph_?#SV!RSEp(!tQ9MhTiIiz6wpX(IXy^2-U|Ym!nV z1X9f=bBu^GP-Ca{WilkdsLqw1wjvXOV0?fPgUdxsSo%;%h4vXTK*&%=J)+QD{-L8n zmYh4-Xk<%%eLc?pX(}{CkSo}(m+F6tF5!fg zTxdGeRr(>?JGyCthhrrtjE?a`wK?&aj!J*3;NZ z6ag=z0h@F28JYr0RU<5f206pN3e*JxSO8lvhoFnF^VAgqWEp27HyQ<)0_(R`fzA*y zme~JU%R8R1Ni~nt#Oe#L(S)j>5hQ{IW-$hY{VA+-2 zV_?|Rl)B$NjmFwG+dpN3Q3Gd3VJGQ!%FUyn$9xJAXvopl!S<-NS>RZJSFxj{$B8#P z?h(|7crwlsL-@n!E?tP^Novzpce}Yf$1Zb%&5lYDtWw3@23bxe(S=$P56-DgWjH;n zxYHg1MB>>ojvZD~K)F8=6Y9qYHwON2=u6Q=5|SsE3|odn&ujZ!oUk7K2q#RYe}!Ve z?ES$=8b{!bpbif! zoM}15=ninX*1!d^;n4$dgPZph@sW!j57fuoNv$)ySGa zStcCOfkl4lnCld7B=IYH2o7(0FLuRF5s#;Xjl)`H1woCqyM`hnH@tQ_@#q6?<`xA+ zqunC-zV)J9H!8^Wqnm;KPnjn=r48mO6{NW08!ct$PX$3}2GG~%)HBg^!|vOfTRBMe z`g9{4<8Oot7@pAB;w?jpvm&tcU$hwjBNEzygjl*EuLy;#tgHd2>#T*4eX#~e6LRHx zc%0xZ9YoJ5xfJp*)yp-4&H92iXjHP9_<_}A1SF*%!w>@F9dS{u`r)dvd0Yt-z_&M& zgoF&f{n zFbJvR_%~9&OZ-N-1JY;+DZfmi6I05a_tvTNkY}t($1XHR0BIc5@v)amzwPqUMqagtn;kp8VW1k@kVk=fle3~l_HY@;b+BCyd1 zLf#+2`Dy$e!(#uG3y-fz&SN3L_avXCG_jG>u%sA|CFW1WTMh!rHBMf3@d^ncq~Gc~ zQ7|18tS_pAQe)Rt*8NH_no7waA>9%ivA)8r=H-QO66|%KgJMb&bv@-iO}0LV4r0;M z$q2t9?5c8wNa>v6OR*PbMLv0~*`gk&=C775s}7^3)5vJ#?f8=+tCxYHN;mUWlyqIB z-UgMlkbVFW=~FPGO%iGXqCT9T=e5ba_(xPa+H8`|^9Qej-q{xwj3s%Ht?FJFNpyc| zkDlBEbdU~0s)L?$5TVsBrKKv5so?(ir@v%-I`K+`GVh#xB7*WxkkPCeX~i z7BH!a1GK5RPG7>9_QM3_!oo5BhhGkBQY5AZbl(J?hodp>*K&S5pR5E?d?|9k5f>Yx zlEt`KCb##J41E*UIO7Mp&G5x=XKusW*9ypT&6@`+_tJ#ZZ=sNFS>6XN-X-n^t+Bw> z{Zq;!d+HGi^=brL7N*kcVb}ulh?TimKfv}etYmD59Hgvjp-d!v zz5a4ooXQ5mKxZ{%$JgQ-kZJ=>l)a>DghFp{8XutH5X$I^o z2EyuqzmJ(KpDnQ#7}gcmEYJrSl{>hK?wVlwUOO*pyKyGY=DG^96xH^FDTB zF*67@<1V2(j(IqOa%i#ZojbZKh29@5k6SM^E?{vu@@sf7xI$ZzRYxg;&1ezoELHZ3 zc6n_OC|(mG>P`FD#WeC+@&`=P%9ZmMYZ~n+WV{nEvQ&jqeTROE0*O zy-op950c^LcKb%jl>I&yfbZan|F9O`Gn@!uwHIi6%{;nN`$KP`Qv5`y#yFez6rAy; zwz-46rA}+kD)@4+;MyAPOlJZh;uz>CR?wZ*9vGNb$U0l5zlJ|ImaPrNAkILBlC&HQ%pjH( zdb`+={!sbfCLRSt>rLn&YPawO2{|wA``%~|3u8kQW*B-VrvJyCxODLV&~|WHu16No z#l?zaVQJT)@;dD`!&>8=(CC9gFfei~O_2t7LHw+%oS_c|4jm@FE>k~VbXqCV-Nha= zS2hJGunREIk)T3cg4deu4#JIbp(I1Za}jiXc)Q^9_E?yKVa#d;8_nQjUn5$>1C6dm z1p}h@78c52Ks!Dscq!FfZt?I7Bs1Z{Ptc2ScXnpelmV5P7cW){Spbcur2Q&(O8uo8 z7S>H~QR)}+S|?rRO{!h3K63AM{`b<1Z9gYz*x>c6i!EvN2J`FH88r` zQm(oZh@ItRxqtLDJ_r7}#k{h|zZZegf9mKEni%C3@J5uKs*&L^r#S<6O5xkdGPigp zj&+{?VPcQdX;X^M>=J$h5st)vd4=nfDx)f0g)SZrHP{q1TBTP=R7K+o*m;kpT z1l-?QdcKFGNa0>EwmT?d=T!0RJ*^nV6R9Bhh>RbDP~YX~iX3=kD05JYpEU@HU;`Fp$otA6Pbm8>FA z(GSL99a$8?BRa~KmMVfOULZfNG))8QAIrAd`{gwu7`Fp)9LYFPiJ2z)`}{S%B%rBk8^v1HO4n!T%Qr zQ|#qXlpW@?Uwx<*wE0C)!KWH3^r9m3&1&=*fZU_2s`ELLaKX)^IJRw-{uyZG@BZO3L;HcTs$4SKr%ts z11hFL$5}!_@`mUA=U`(auzct9)&khzP(W622Ru@@ev)eRfXN#l>M!Bs$`h1S;VUR> z4E{lRQ6Ii^QU0u4Bo5eV2z+pa50n`4ZqVIxMYLW$Jrb)An!jaYiliG6EQ36zUbAM7cZmV_W=-a) zF*k04a?}(ocw=nCv%jN>dM}|)DVPBP;%I9G=(!_&}61;@=-_^()VMjx~V>$|H zz~p+zlAzEiw2Ks3wGh~Z!3ma&9ithG;R8w0Sx(ryuk+~jRrkd(e9rdYXs{VCz)q94 zos%}Qwu7`JU-7GcM+eP-4*S6lz;%NSR)?|SafRB}QG9Zloh!>5V4LZZ*ma=G=*hJF zEd*Drt0cSCPyS-5`h-9a5f4JD2mHakuF24RtdIG=5dO0sJ(&4#jBwLwbd;Zs&dDWb zWeN`!dGzVPx_PH~eKwIUWZJt)^ndClzg}sYj}09!K4p#4rOfjHPQt@Fe2&DpsSt`} zyxwu(GGG~|J$>+|kgBI^jxnYuY;q2Q8%!<7=yf8nzzhMlRMAScR6ofa0n|{o4~643 zjTG6;IGazWdmxsLxBp(l=q!x|{s@(TIQqkZj_@^d;2LE1DY@U=HQ^6k9z2$TK^Dbd z@LFm^fbU-ioN{EE&P>n9YLSl|I0b>~!eTT$4>~qm42Q~%GP^e^rUQ8h2h&}NVH7v8 z!;x-!*9eiVl-Z$XvO|s8R+8}#n|PQ$tQW9hO&k;kSLBpob&-@vY7CZ2k;0bYc@!IU zc&k8GM2<2#-@Q=6chg;IlhbipCs53-u*@J%9WPPWMPVXweM)iubXoAO&T|$!Fx)`2 z&-$)U^-@`mnviCsG^|rfe7xgJrrt1GS7V4vOJpW8yr4~k+6dN78f0q$+#nPhyyjtl z`XOOUJ{S!u4{^&LBZee_oY>oUtI4~|<<~rFnJWT5jFAEYvm`C<%OD@`z^iHWSYu^_tI6FC6rMhR1XQ%*oa&4JU{d6H>#}Oda#G5Ocq%XL`3Z zq-=MWVst`@KK$w3GY(Pgb3M=_(Ls!d6@gfzlMJ$^la`u621Mx}3i1lS!q9yMlb4;4 zB-HOyMMze*2c~>5ahuI`R)M3QI|Mk>veM@_7Wa| zI}QjQn~r{1n4|@jbO4J&=y6fDT^6g@O7MWWBav1D=?k~>d80FJA8A{dJ}>*VDrS zg75e?SnA4Ilju!tgl&A$kIfoGK?9-=vRhPiubdsC#2^H=kmc7EQWrS(6@Ar3_b6LO zIT5uYvf>#xMjl%Mjz3L0ht#uAl3?NZkIjCe=&Anrw#f;~!pMnKBmCMOfPg6*Ub?DX zd~RNVn_fkAPR;fdI-u1*2HDA%A^I`|DX= zT%ad4WeVVZCnMSepM=N8(ylU3dM**kV9&%d3QbKE{#)Ig(v!67l_~iqyS| zj6i5bg0mUp`!|M)92pQXqE~kinX+K9h)ANPS-Lx*Z)3c|8DNP_cXs_PO6ni*r1!{SFZu0NP%T#iWo zk9wyGic)AxU-VP}>^HBDuo8wUTFw>~YLQzG@OW4wc!gyQ(e=?}Vn&tNY+I2fsK5tZ zzj6uLKsMMs&)=Y%X>+a=8_$ZZeL^3%mBCfn_gurr+c8f_@r4{XV3R0DnTIL4e(SR| zpQN)P7Y;Y9(=6fMHdhdCUhWeuVG}4u`=t8a==O-qR$cs;ibykcJqAOeYaDT?QSsq} zeZKg{%Ri#wE2^LqsK*2IP+niMxZ2IKz929<;GTGZ1b3jg(t%$99w@p)TZkPdI%x5M zy1*!jJMo}WboU0QvG7)}bV*kZ76V8!4n43T%QgumX{(xjO%NIf9%8N1u((C>>Y8!* z*)nbSm&?EH}P;zT*->M>};09l`hL0T~gvvxldlgA z5TgA{EaxOdMDAC%Axmg7ADb8iuUL7B1C6j+kEVpsSpc{Si8>O<8lutm^k_&OGkf5s zeh)RBEh|ReG_KS|vi^)ga+VhK1_h{o!du)v$1oTT~Ur}Un zZvNJmeCR)j7_*}d;?r2#&2#mr7*Rf({+n}PIo$)=&JwtW_pbq{iG=|J4(7$8W3dH9 zfmx0G2Z7YsMWQPJu`p?vq=Y`T-#3y9ctmw;A~5ZWLV(jHWEofgaExzj?4}@mqq1)( z_nonTXv|`qZ>q*?wd$Axgix(m8l}m|t{Zs)`LxaAJ_NYt>DE{yJQ> zA7R`jjMwVl*2p+rD9tMt!vC@MeThemKVz(}+*V$v)Z64L&;_@V~Kjt=&L10Tv|!b=4fAy;1~w}m$$w!xuf7;-!!2Pvh}Mg@5ga_8v179gsF zEKE2kcyn_LW|==DswO+eAb45OV_F;{t3sDWpK4&NRbi;ZL&2H34&4Fyz47%{e1?5B zmE$i6<$C8}OtN~vVF!B-sY-V1J5D2j@VY<;$5?&0)Za4JBsP;mU?`?NI+&C+Lvgu! zqae}{2x&)ZTCnqiRy>ldRokms)q9go!jTbzk<7~eKO?7!a?rtp z0!VRs=P*SvC`y1jjv#C%<^Q@_);T@s*7;d+xPsAew?s!I07U|J!IH>lxileY1}L)y zi!*?lcxg=NPCm1)i1-mP8tB0dgz&OiF^enymt^1Q;I)L;n`QsdI)ztlT=o0uZtqM7 z9C$)T1@j`x4y!T3MC1?f)yTs7)1y4l!e2A#KR_ zt@R{eb6fupruo^+CVFeWP2Wh@8%yQFYqWYU7}=B#xfc3~^9WeM$5a24P$uGT<0|u? z2KQn11G8_54inu5RPh`z9c{!Z;J6^=6*V;b-Ku&HsAh;=r^Uz7u>IkYem?P|rCmXn_LT+Whs8ow;N-?t<#mG}RJ=uBU z&ZMI79Z{Z02)V~C2|VwLDLNRg1$`qzFRAi}$!UZG*@4orgQmicsSrfmcu~oP%a}B5Sn0k1`;0}ZhohAM6pM9Tm+uxTSgrr+Qi*N zMRw`cn_P$WeeEr#04B&Afd?-$G$gDEm5AuUh;`f}7#k(UUnfK3a)!v*P(!B_dv$bf zF<+1-9Q5SY=uXkz;a8KGq^%)FTI+TGO@F$wf&$hJtta;%E%tw#k?YnqOY}`7bV+oe zyHPG_{NbASRDij_NC^;J+<0v&EXoo`WR4^4nWvW*`*WH{E6vhPJ$~FEwwT{;^3r$$ zoBDCG7fWwUwU1@`C=G4inRq3r?(C~xOA!Q#UIM$?=Q}RQKq|wiujj^HN*4G#$Uvf2DCuHlQ=tJ`y2pY@35SO9`QhF zKsCWXPG7@oEdcHdO=d9%gyGUe0eD)+iIpn-wabG$cybi80Eu)7!|=z(lE--u^mSt? zqne6T5T6b|g}4p^th`?Pj60>hXXvwva2&Nk1*a%)%^<>A{sWon7q))v zX>vRifH@gV za8d1wuFf0+Vz9K4$-#^@P`k{)m*X?o9xyQD{oFIpdI1_!04quj)$FuFxI zIHvPr;|W=)Tclbfr#P~pG)IY=#5VN9MEg4$8$%GQ)&ET%8-v{=t^cuwi@lv@nu9>9jBj5Xm0En>Xc!; zqo#TCgAM_BzXMm%^g$`QRn%_!<1hy+aq;e;v4x)Ag@ zR#m8?!%U0r3o#V!2e0zQzu8ttC5cPLjWupV;j}qabtH5`8FN_QeGc6g5n^M(DFBFy zgDB7;k43i_nr5-+f)iI6FmZ`{8>hLk>QxgjaH&MK+sYAohg|0EyC;Lzve#FZ?nW&D zGBPd@^x*~Sc}*{ECXvAB*j^hr7FM&Cuqf`!@Epn&s8sEMs&A(eXc+zdKkH7Zh6phPcePBfrh zf45<94H_U?CSBw+2+vhu=Rh~6I19Uf+ZdeA1Q`#VL2f|PJUF`Tq(@VQy?5b;fH|)> z7%hG%h%0(}Q}hUW*lE<~;WVX4+}FYDPCR(He33s+ah2~jJ{(zEU*CiQB+{1`m;>6I zjEDnBexTZ0q`afButfpFnD;e%&G`4*<=xoGsL(uu7XaupkWrG*mdX+QW*cB=`yu8cWRuRT*S*6?Mw;oHOg0IEpqkR~&?TfghKCO4oUwuB()RyK=^m z^~)@gdW6%eR7Bcz4hS3YO^BWX`JY>CnNW+l;mr^TGCKR+Iw6&44(#-wa)QT8*D)Rz za5hWHTUOhy#zmR`c0d9%E618#g)ilOKwnPo`Hhs8qs}m0rVP~%Eo5_j+58mN z50eMb0Lye|pv&j2zgFbBc$IcK?Aq&-4q~sT^oIFqq;CCV$1b7I;xWEddPtuD!v#RA z#O+}mULJ0PeJ8Lt??}4<=l)>d*AENn!6JqlOw~f>&i{khCnGdts+UA|lprNH=uEju z)5yPNan<}BBotW?1V}3o*iUc2R7ZMQMYIlMvgex9qbLd?v;tHI)bs6QS7t<|`r|#` zv$dnlG*#c~ZmEqcn#QXC!p}1(v(Op2&E}a%PcYm@xTxNW51V1cQeGJ1`UO#QzvN0ID*CvzdWd;78IW#{1B#nU) zXZU#*aqWQzJ8=)X8@?&u6w{SYp99?9by=u+niR|obGq~}li`kn?o{@*hOry;)Q2#O=8d%sX*){hFpGKB z+@uDHgcp4mqxazc;X*9y+_P|4tj0}KZIjJowJpGbl!Zqe?K>(#p&MvNdP;+IsV;Eh z9^e^MIC(IOEOu1Ql3F*KwHgBFBQal-*m3<(Us-XLL}NcjtDh`f$N5GaXKXJhn-r1u z>c7U5mTfELDB^+D#Wn*Z&1@kFe4$x(qRrdmtQ6f@kP`mC+3L+KR=W&!xeXuM@}VaX z=MR^JOKs$`lzbs%&BduT8<%qxzu#PGGd-@_|bKJ{xXd^Xt$|>x>G44eZLt!;mofy2!cY^OXBa!J%)DA|5H zE(DV&($~IOWW1QXLB|w0|^ z7?#loChn46RW4z17t>b<{7=bKZqZg~JVFUC-#ot(#yvqNkf4`B)(nkIF1HvFyU2Hl zq=*B=iA)1Qh5_C))}~8DR48<6>zXlbeZ7y9 zu4v;-Zr!v+PzM|JH_KLVN>D80s0)M=M6sRY=;;zyA61XwBxV4BK!3kUe(v*-O$3eh zLc5qUhAqJCKFu>fOajK(w^Qf6zyUCHUU^2UDe6=kdHg2 zPXcM$9dcZjI}dF~|Kvs4#3>8p1=3t9?znSgL(u&_Qme?cMzlc|q5X(m*JV?gP;>xk zW!JP5KQQ02X?=FM)WX^IPa|)VLh6u617*%~f)qX}R@mYP)B z+6Zbb1GkKJg#qgr|N5j!zbYdrwy7o}ayaXiY-!S*_kPzs%wmS1YgDoe5MXUy3-)Hd zKx1*q1e@B~+@NiIg~xWEF@dQ~F`YzIxX&Z;8Ge!xfQMfXC#7d205I*N#F<+m2 zu%8)gQGCLzU;Zi~)u7AfIKD~(gD5pImxTg(VT#rqiFi3V>Ir8eb_~n86OmC6iY^ya zo!YfPg0UJKamt>&(tk=XQJCa$9WlIu$e3V4c8W3$yCF1u-c{WEA=I42(W^(M~h@n0e>gyk9KAY%@2wPP&P-oEx84RMezpR)3W!0KRZ{h`f(>;AA zS<+H{_!1l>c5JWW(~bC_>!Y6YvbK^WIE zsP+^Stw5RxXt~Z`bfYO%D6gDhGAqXKHpq`;v!?TabVV%-Pt9u&Y z;CH225D9}gCV04WNWt1>u@p2;)cPv#^JSV~=gCMu)ftpBIk2vhK+iwdhZ~&(3Jnj0 zD2G*Gh2bdeex;Yw5hVChtA*TeJ{vuSo~? z&dSa8$G5Qhlgk2%k4yzQE=!T}MB>b3DGKc+uND`TYo^HSkuNlm!Ymx+r|~YFuTD|P zBotdLS|BHDKS>ni@ntOKUX|#w)KlNfc*TAG`iulR{P+=SKUrG=?B;vxC9V3_q~#;W zamj091WQH)pS@HkHQvQ9j{+NZf4^fT&PyM34(_`j$=pDOG$Oc`{ZN<)$jtpf$+q3% zkq{Y@LAzt$Ze^U41fzH|Cbp-B`1Y(bugW@Q1fdA}1^qcfNl92GXHlqdd9bOKkH z=LiXFm!4@NEQHwxLt3NRkRcfqJzgc1{5gNA@hlbxW8xS9V1hiUA?kw>Zi785 zUgEC{sFO2|vM=MxqCvlVNZ<|p$a>_Y4rMx)6Z{eyMZGDg2oBM+A0fwxR_aEqYD0CE z0h^iHuLFQ_PL-qd(A~dAzZPUte+u)1tD@E+KEkwyy8ShXVPCitH6>CB@>?|7wOE_r zAK?NqsI|pTGUk}zvO3S1;=a?)o z!uQP9-KRV?6Ela>Nv)- z%VRCI1{S29P3y>(%l_5no-C_Q$@$9~a75AkbwoazqFOL#c{jG9aOSM6$cY8F<~(wq zRbr|M;8C2~=)7_1l&Apdr&6Y_ltpiB!fN{r0pNmv1SQ}YnstSby<5Y}rFDnQ*{d}U zpgnT}H)yc>ToN{IB<$~{1D#4h{6<2Cp4hnoOTHd@smIbLqF$_MinBTFiF;x`}KTl7FXw0oJiL$>j7s1d4^O*YOVcsI74EQZqwaER`UaYR< zn6*bgsvTB=L=kQ|Os}KU7Wj~#zilz95ik@OF`+_yBbyZ$Q?-H(l*3(AHfO2j?{gp} z`RZ}k0$**+GetAidWeR_EgBJDTr@Kxo|q7(6qa@VyHv~%HY>32`TUjj_((z} zniWU6cY=x@TGE|SA>ksA*Ff0qmBW4Ja0Wsn9 z5luO4DFVB&AwB@VR!fWPC+^p?RX;YTfpUcFC2mVqSHGEq^Fb5IKS7R){wHJFxVu~I zYc9zkTjDTzfepy-(C}Ojfvh5c#^eF<@C){w5z$-U6xEud?afWxfM4dzVMH9-jag80 zOIUR6fMrfqbU7Cul8<+Y^2tOB5i zi6{~Rpuf!V&tdnqI>)0+mmBy`NRf7m9H zdc%psSZ|MT6p$#KAzF<+%-qdBl^wqc?|EK(xVAQ;=_gpVG;NJOGf!dlY|a#8s`CIe z%fm#0e6F!M0z)Ki*pP)nvh7F!UtU(NX}rBIvT<5 z+mImfEEle^SKKt1tj>+&qg#eNNYS=YG(vF#3`wL91k_=FY13xoX_nc^q+0Nsvw=BU z;8Ni#;|UOKdsOf}vtCV#(6@|+tji`!4i$myCWwHW(7Optt7|wg@{i<|iU-T0XXd=O zVI-yUAnX)2cvjHR0-fR=sVd2o1nea*3$0NnC+l;|^aE>Xdc_(l#VkyOd!+`CJHI-M zye3cxJeivTK%NpSIX;L(r@a!+pnFQ92>%Q&!;5M z)0r7WGPoDT`kURF(I@lS1N3gMQ=~i1FP>4P`euc^qqedNR=6NotR0bFqNndC`}IDd zj4bFV0mekft3~G`9M;|wZNQ=_>5OlaxuA^Y0WMzMH;&h~GT}%-BT^^)9H=WGLhIwu z9zmKXX@v>^EN96jw5mfQphm%qR1T$b<86JdIC_x`7?E@o_mwjp)0=BvvP34*0#NsLYn+AP`uoI8X8$SB^5<57 z!U)J%Z|I4p0NM}i>S8l>wORdF7m4;8IN?yMT}Dd&rO^Jw0gwIGz3@32G?lLqutQx` zcQ0z~`T?rK*~8_*i6NKAVfziir7YS6s|w4)vcCq9>gDn@h(~^@8sWle_`yghc-4oz zh(riCXM-LccU+CDcOU`W{5c{LAC!J|=dsb&Z9GkBn7$N_Ltxc+|PKQ|`93(WTYR>#KvOP>S%eFw}VIkq=_h?LVb`AC^ zcaX0!%GGguQz5!zfNmYv$lm{5?oeBRyo?c;HdK9an=*`rMn57+_9=qn}70^XCYY6Rf2s``G07|Pjc|K?5Pu3m9! zNRTU(_vpB)lDofO+x!8~o9p!;F0!7J2~!)oP>;6%nIa9f4t^1i?yVnr?z%`gTSN)p z;d&_JjrV3}R}xT77NgDn9=>R z1;|O6)B_f(;<*toH)rLUT5QfRD*I|I!L`_qCwe)^?pHo|V@No{S4%|>k+8!hi@^;U z^|sZ4R^DkH9Jj^(z6xAG9RR=fk3pdp?CJDJit(G+F{ohcPx3B4JDjqZkesNEcDg>mw}=6?VmMP zr(-b?+De)}U(6HI-b+Sv|9(l%^m$mCeym7~o=w_w&OY*>Ip=B7zOT=wxjAB5XxkjY zmG+ti47g+nA|nkbi=rl{0w43W2t7P}r1r~>=DP=+%@mg-=0JlG%h%*Enp>_I(!4ja z(pW}JiqUk+YEoLnuDQ^lGEWz0jrNgRXdbOIrXpLKYJwh3BZnYXMlG^xaz!U6SFB9mbKA#*Q_M`DoEGMp_CmsYZK_L)0QP!2dS?wH&Q~DVKZ{z@ki@ zDU*%{U$bcMf89Ma8LfTS7t+)>(V=f9V^O}A_PyTSlVXK1NoTEUIyBciRhsi!1|iodbGzo8O`y^&7%EWm@I?VD*8uJ=+S}Doi=P}zbLC_+LhxhkKyz>>?KO+$Z!0;eIbE7Xzc`dg(#_+wCEa=l;;|f1kGV4Wi!bO(!4`cM^;+*w3#lc zaVw*3D+DG9nzmkz1~n5UIgr_&tIeW0Yb2yKs}*U?8fnW)eGEF2yf~f)L4#~vtd0vX zDb05$WpFZUZ6->Kv(u3rOVZ$K1hudj8tYxP^NDceW}+G)OVg;>q!@|No~j&ZPIXFB zY93W$1+61TG}=bhffhNLX4wRq1Cz2cSuNT_{aBIKP%TPhsMDh@REwg~NDjlAG8Kf> z)R_t(CGA0lkdo%$^>{iht>070sbo}A7KB#P(%{o6sbu?%pm{{2tE+H*? z^!U7r9i2OV7HxY|$=GokXgVA@Qqr6)gVxM36Q@W4J{pabv`9_-ApkZ#j?+MMtg#rJ zj8!$7Z`>$gz{jF^W0ab%HPGTl0RtvxN)7IHeWXi*szq~4;2O=ew3e!xMN^Fe23XY8 zk&G7giV=_MXdT%u&DquGAGStF{~+qnktLt zUNo2_Co?rJh9qrWe4rLBPS;h?o{P?1jrI>=>z9zmKL^@=gsjAmunguvdmU=xFr?<* zkrdVVRB}QZyNtATC?QR~Be8-89Xh1uKy#XpLHmiB;5DHnD};4OB02AJS0<;_p5dZ7 z%Sh{xlr(0m($>XMSsvE~}a(%^&0kuci-TlEa`V7^1ZIU~})_bRmSS z%W}!8`aIS_b1Bc%o%l-hF;Ld*d!i<$%d&$2WgU_YCYrvEDVkoMmFn>dvAoXVrM z&kHMf1w!hXOAfT?k7_jNQ!_oqrK5Nd(JrNRm-AVq%s>X};5y?ZfP9bl+t~THI?7+S^R>(cFF>?e#TR z4ki~G>kCPPwhFrDK+|b}>T}SdK2CN?ga$v7_VfuDU};Vp?L$;Z8Z?igOG219^1>8U zmAo!FSz3$_#&^(OoKkafKH9@Z^M>!if}coJSYR4x5n`Ams6y8z2Q=4o{CgjR=5Nya zO;xpM{5sU4?Q8llAvpjgCGEXOU7yHPlsstWV2ifynj}r#jBYZ}oZDP;p?&3)Hf5)v z?MKMSoCEMfmiAu20QpmvO{|HU8-vy^U_j{d0Sel>fB}!DQ))$PMztlpIV_n&V4KAofAP zfLVp8OAbiQqA8{^Cx~i7XmBZNigcuA5it+i>#XMHk`ilbHCbBB=EsW6tdaxSR9$nS zIkh6~qpYlg)=|I!OiomzvLa-}+FVi+1q=wuOY@Nw4P~V99Jn;o9Ed20 zRMUeW%yeiDJ~Z0D2mu2kl7mf8zyMBWI(R0C%AB%nO4OV_1egOf8hioIi`v&x2>;F5ooTDT{F~aO(0V>zS>rEQrcoAHPv4dqj=!RNn%rb zIzbAjF+Q}8{~?YaU*mJ+q*{zHZWPd8%rqA(Yf@9wUejE{mkfU)fjKpWnW;tiF4U0? z!jI6w--`1wWL=YL{(hQQB`4PUVUpixZQnP24YlkioJ{nH1|8I%yL0!x z9eQsni&C_1Z_F8>ZEIr~%(Tf`?8@qg_IiNk+7r>7ozt3K5z4aZ$!6ut9jTE`J1ug9 z?D8TYn`8gQny+88d0lc8?U<(PnHCqLm=)FFu20t`J&LHIompm4mwMJ*33Mc-@U+$f zjnxNetFD>zYO3frqk0FbW3%cgs;NFb*L-LnT_K}a6GzdRnTe9EV|0$bQKP?zu`-x! zFh;K~i<1>-E;^>sSXq1MnvjK2V4F%dhZungAuo-h0IfsNH*MjwT7(V4PJ7Tfnanf? z-P8WL_j8SG{2cCcP1EPCc?=b;5}VJVBQUvh0vb#4K2elx?0lV15Sb}LV_BT09Yu@I zp>s?TYR`Fd&aL*}Rr7C!Yp5eze}6KH+4w(g_2KvQa5f)Ld*7MnzGKTyuX}6u{z!~s zy{UJ^bR@>IqXm9(H987XD&23|fh==yt z6SE3TDr8}rtHPnStRyG%!6*i3OwCV6Vr^9uk{pG+tWMQbdqdYHsTQ?kNK;KvgW3{% zAcfNo0c=u{LtxW9cJ`qN6u`VKD2;w154t zGqUyTOLhow2+;QR)7O>cK(wf_ERLcfd`$qNz1Qxw;yKsSUCVY&z?WSsX4{bLWSVxW z7G28*uMb3fuCL}?E80I_nSbkN{!mA@eV#f5DvsnR*6Fv1%?AH{mhm+wtiAI#cTD^H z0Ml7rV{~rN-8}@He}D6}!L)8(naOZhQ`Q6&*(EszXxcn4nR#fj*)@hhd(E9`e@#s_ zG0mBqnX)p>6g3t2T-h=!%Icg)F96GT8^ zwRVhY?6{H`#Y5TFadt_LZ0b0(umM_h3`5T`rD)DE#Pz|vMq_1D^WU`ojS%yin#LcR zn9R)SBJe$_5Jc0D51+sQa?&WOYEm2QF$MHYbDzmjr#*qr0L|TVbqGw5#jzUuYTEj~ z`c`scOHGak`nGcj^qbB3URH;iHlgd91={y|Z>;9kxL5WPA0ESrlW8!Un`nx z?3$g<2W!s+W;HjT*_=BCXuZuEt1a(`LqKsg<;7&=0u8dFBirK;24i8G{cI|V$p3?$xNn0Ak(1RtUb5REhVu`djkHtaWly))A|{s+P-RJyS`>Js>OGR z$p&BBdo5sHib-P?2+hj9!gmcxkoy3w*_EOs$ZG2th^8*1H|QRpYi!Ov^MkeDDA>I- zLA0I_1Zdp4QE=Ke*0kStM0>3tj%$UyoIcxKS0-Mz(F_qNt)Rt8-Ee`Ox zGLo7a0*VPSbs+1tQEUud*Mw+~Av^xnsr4#H@lNt8fmOH*RdVC%E70JoZdOeQMa7Yv z=2qXUI@Yh6BBrLbLNr!YObIZnW~OTn@VPQAsuIBDl#!h*n-``*)mDw&X|BV`M8zPi zs=9pqqQghA58bf`dOLp2;O(8W~ zgi1lEvSYOeRf0OrL53AbYTD0g%0SIK{gj_7p7-fP)2Ffk7N265rI{e>YVc{N0PQ`6 za8I4TwPUVIj2uxRuh%+kL@8%HDy}M z#veJJiOH$J>?l4>zau%G7C&MLD3n7J{_cBFbN>aJXBkCDUYlyFK$3&C_QoEW*W*)< zm&8Q0_-W9sjhvol)x0q6d7PQ_5ODq-9~2zDu?gyqp{kOa#+RmTbDDOXAg<|=E%rl! z1~cp0YYqX$WN|W|=K8rg6YwbdA9KXC<*4zR9FN7I7CB;^%m5AY%v6$UkEiB165#g5 zAXYIaM5aSE=FPO#JT$dhTu_7CX%CwytdbG4xt*1jVFcdxy2jS3;AEjmNlxRo4^36U ziD*$nHKkvq0n3&{a)NTS;9qv#BPCrg2}4 zod)AIA*igq2+A}U+r#FtGXxZk=O)u>3*#j}8pQ}GFc>QnVU}4L4AUT#nuzvb)>paxyUwCCcy9tzN$i<2VoRr7}tt)KZ7p-Ej62W$Hf9EEz* zhl&eF3Zt*4$_9C(SfD?y|LWtOAWSij8cWsU~%3K}b!fMTc27c?e({ z%K|CltL7yk8gv@+&8In+o7rjK{9}c>q2eibs3Y699H43UMu9choJ}=hiU>@5-CRFs zs_DTLSVwA_7f^JvOol+M8L}11RI+OOSUoMW+{_4teifo-#wyStV-W4J0_1oD(S9K{ zT?$TST~+hd*qcqfo64df+Aaab$4Z8-rmJ8W7efHoW?i$wri{*wW>OH-Wl=3cm>~esAZ$;;X%0eMlFIypSo2m? zRyHY(X6igBLzm)#lYOqX4?b031!(cX_MUr}RKWh|n!9R(EXCAFP&@dZWCufSJy}x+ zW7KdIuM-a7-aCq!Wiukg@wjKC#HImSdz6=)AR1!?9uyBl6Qd?X z+pDQ;(KCt$9paiGAlh^15S{j$=3PLcxg@A+-2@&KpvJOocdVv;&5XP@E$+((o1?fQ z+Uv*7HAH6OD6aP0oE4q|tvQYPQPkvMZJCp)<*82$1SSmD{w)-@fNl`QI^}w+b6eT$XFbWJ}P7ndw zTLPTs)}#2Wxhk{@w8jWT$i|wsm87Q0!J4YoV#vy5Is`h3ezQT91QY~lPbUngIekTz z$;*??N)~7yvxx%=wQjv870Mc4irLolX}Tdmi;81xvkJ6-uS0#Lr}=x4lT8z%HAYcQ8TOG_XTlo4wy&BbmC1CPz8A7EiaD!! z#rIr;?`h_|Cd7wo?-78otc)Vm+c|4v6d^SYc19osk(n+`dp)O0y2f1XJ+nDyT4x_C!?4Eeu_yu>MK-|nx!O(w z*aVuCVgRf9bmS!=nlcBYB&bE^WK#$8G{_sQJ(eB{y$r@jk3A0R`4{XO&GHkV3uFA}3RYC{TlYDF)EAw*{=`7Q;Rl0g9R! z1xRwRi1ydSOi-q|&Pq}QIEsg?HHScDDLC?ySYwr$!WYw4qj#Ei6tk*kXmT>wtgcVi zq5|V&ljc*PLA9!RY1&h%*jzK4p5}Bjr|XiL_7kJ%NUGC(Q%R6iz?auYQr1H?B?g$X z%OXHg6Gu^5vvD;=Oo+BoYO0Bv9Zu8e5StxYpv4KYsH?#^L!V7WV587n)^3qfiYn@pfg+Ht@ zHZ!>)P$}eQW|uW5>QkVtfB^$cn8YagrB=LW>bj)yLAH zC$woy>ynP5&p-(?2pLV`=laNl24N*JGa>*?N&*m3($O4*0gw+C z5*(cB+voifq(RarJsj+J%f1Vi$w5PfvAOpd93JtxhQ5$5jsb)S^}XogLTW%>)cF2> zHTrW7N#zn%dbF#*=nkWE?n#uzfph(R4&(pZXJBX$s8j!Shmpo&0mkR}y6;i-pyFU` z>+-^dhr$BCLVp+Ex_*jqsYo1E-ADKA{#s%fV5m!vSM7(&{|-J_;E-?tUwXjTy^u5< z>Kb8u%}*V{LZI-z?Hp8HsQMD50fWce?x`m%L?XQFxHa;_z%p^Iy>9&z3LuZz{31Vm zhJlO0``8ct#cqBpp&%6e>wa~v-R`Oa!~p_VdN}7X_G`%d@ks>;4KYd&?SHH8xq)#2 zfvT`s8#0WM(y-6~IVybaUAtwE2M7ZOtkT23<5Slz##d$(I7A*)Z})cB>g!lAIOvle z+^gz(3c+W``kz85~*>4 z>v0{jbM^kowmz?ZH8ULv+sFd;O7vfrY!L>i-`N77P#`zQ?c5IcHsWC}0#s_PFsWJ2)bc7}dSg zci-g~aU?9flO8d;Ul-wHXiN;con3UUH$Hz21`ZB((nG%){_B4)MudXO1U`L!f4uQQ z2?K?~?!NG;u7k?ZSaA539$NbjWwjBr)o-7+w6@sAqczjf~aAqETy`yBr+e*r>*!$SC6d7%VyaNrh3ZPoby zTp1h(2hP&t-@Zk`L-HHpy0;x4H!~;)3wP1qI{2W|?#jUeLDcA$SO59lt4t0Mpho!b zo?j9YqzFCk!?jEz1@B_R=r2kP(qlO??B_A+@3TM*BnW-A_qkKZj=_*XdDyKSSud5# zq8-%e-u&!1S}F`n^yggfa}KG2KsCm12qScL@u6_A0Qar;JA@9oCk_?@hSI~Gx{DCD zV=@ro?mKOT451!0ERs6@ZK&H2zDYs?Ge6(Quj~CTjD@6~^!yLH1&PDsJ?RmdRoy># zNVrVmi^>}R`z^-=1_{GE_ILjG+&(x#AV6^Khuz!m6fPPKlNclPjm=N_A;W-l0K3_v zYjqSjBs@EGyU)(;bBv4Qz*Ty@>$eTp^}B9`fZ^~D_FD*HZ0AlZm5CH%^oRWaV0=Ut zD%=q|*Zzn&G%$=CeIYX(4h9$bxsC9_h=eiuerEV$G^j`p2i0+8w{ZU_5C(>Og|Xkh zi$ikYjgH+qByflvyhnZi+&y;(8x{X1C6NC+jg+^SsOAiP_X~c@R{L*{T#%S z(0J-x-^Ta#j}?N41Ai1S;P-KK@jEiGSa2QVTj$jn*)60ALC1~nx#hSpx`(hRFzAo& zG`>g>VNAXwblmLi@LvJq@YsF&3Ku@J_Qc@8077-B%R8_TnRrLo&)fZAe2YgzLN8p_ zx6J-Jx=Y2O;#_*f-{TJ4frOy~;d^hN?zYFbkbpq2EG8H-YrogfL2ocD#C6S>ED}`vMUEVIb@!66KoDDc=yJ#R)ysn5K%?ria}l<_ z$w4FGUEGb&MRunzDVRX?Mb@Y5U}WZoWWfMCzV>$=*SXk0U?7Ol-*aS-x=Ub*IJB~R z@AYLNDSYp~r_g)-B@N3$Y{yUg;YMy}V1P(`qsGYXZoMxG7RfhM=-jYzc{sEq_jfNl z$NPXxAnYFWhOOMM9gK^l+>SXu2t(x&znh(Ne9$o}4F~(%eeT7J0YSoBjj&(Uw-quZ zNDgj=?_s;=z08cpW#TUG)5!cT@-Gx91s$JbEA*#MB?bwUUg_Zs`8mU-ATSv8#dj|< zJ5VB#h92Yd$C1SW!p4XFAVYu7Awq$I1gi9SUf7YBAL~9 zAnUf9?GK zoF*(93wM9~p1xmx2#!Vrw~Oi+zV-K_LP6uw4Sv5fg|+Rg%LjecyUw|)*)=~ z2j4yQNyTBIpZ5EIuA|3?Wdiw@9#OlWVO$4B0|I2_=+ExpA22jHqJ!{Xm%w7Mpbv7? zUguDsN)8dpVeEGgzVC8mV1VHxe9mKE7#9ZK7nh-Py>K)ru#+CIQ5(KS7=PWtpt4Yg z{;*%2yMGN02M5dznVOR%T;4pZo@Aq;;hDA!y=lVf^`}}_qE(er$f4%)A0~CY){@wq_+qw-5 zNn@e(=!Oo)*9*0~5hf7HgVMvfcE5e!F?OUx7zn=7V>QNAb$`D494Hb>N2t52&%n`O zpip`|hU(zPUPy3|K)Cy>4CB*&%$G*Ng6=l#_uM@*zmH51Ain!xRQE}P^!SF*Z9j-m zF|g=D7k&42)y0DW3qy3?s}DZGLOOVc?EjW=sZ1PH@3(H9V@8l1lGc52-|-?mCKBGr zj$QpBj06h?kA_g4V{dJBkf8x`;pYz{gzf4cgiHnuVw4{G8+|)+d~g3?5kZI$Ds#tg zsL>KBDCmW-A>8=XL4_2?gRu+!dHF3v=MZ@$LSLT`Pbp<{LG?jAQpLb$w^9`Rx9f8B17 z5h1wfgAl$y=Lq=?DvtyAFt+RCb@wiofrW>>u_0vP!LSj=_g=eqan-@WAz_B@Zku`c za?nr^)m?<^{hVJ9TpE>i%?}wm7oWN~F_2K){qa+{yB=Xj2pE!62bDec!>8~q5F|(* zLUvYvu3xu>Kq7chdN_@&PPrWgmxj9Zh}@4L!XS8ut~vT@RDS{j$A#UYyXe+kbj+Ys zBu}MBRBq@vF0+dX4HClpes25wfU!96hWy%B*?EMOMnfe!7+2N#Gh~HS9F)khoqp7J z?Ix54!!?AeZQSf zal1l&{ihIyLsBO_-d}Xjx%du3XGji4jPJe29eG$9+R~%H{b8$PFb*i{q{nmUT>Uwz zK|$fw7k=Dt=8hMs@?d`NwePQUP9YM=rC(Io4A&WUW8xT4elJ3Ox)@k!c)xYH?gtPY zB(2ipMF<^ihZBiG0(-yb);)JXaA9mqj|Q7HZojzd{SgVl;ZS0@5B7V9&mba2;7bqh z=Y}ypg9eBrs`|PJU3{$kh%gi|eDO6peeqc@2?~#O&Y$nU*!(+jXuVe_e_k zKsrhf?tiagvfMOC}Nq zKk31{Z$eoVEbR8*hR*w|4HXN+?ellc{u{X$A`nJv)c4-)4j~dog#Wqs#;6OUJ0LJ3 zjvsWYFG6+kT?!l)?9wCZTaW!>eD*g`nH2nk%^yv8edfzLI&KoF{3Uu1stKI*-jRS&)*uO8XW0)uu#Q5U!ySJ+! z5f~ho$M0j*=kJ-L0Y#EHe}DMws&C)KqTuj;&-HGzy1T>zIdI(_yEz^VC_o~N>*`b+ z7?y(=pNs3>?TfmLhJ^z=*b3V@@^fB5IMD96Fuql1Z_5DWAjbFlHL|k14>Uj=99`l2 z@Ap=3SWsvfhem@gJB6ye&-@UA1<69@@8{MdT)0p$AlX%3cYpo#c_VUZX!tm;z8g4TxV)1d z4Z@G^ejRM*lgEL9Ke({h-JwS4V6Z&)vF^Kd5i&=Kqv5gq+;gapf7=Tl69aeM>aY56 z-Lr&YVbLr-TD#q4e-QR}yig1({O=PfZNE> z)%Ew=H_#w8po_}P>ssN10}2NeX6fO)kRf!2k3UWn7>n1>J4W?=WoA62#Xm_s{=;`3 zVS>c*R(;>&Ll_7a?jFN__!G)Xt@%mGS;}Y4G9&bIm?v2s)LyAI#;i2?^ zuHVz`=G2FBbPfG|p^HQq2p*-!zxHj&Z&?>2C@dWCNsks^bk4mMCJPM? z-NpBGH%9IS!{wsPFhW&+y>HNX7(A684n}@Xm7n?dazP-7FFiCmcJAG}#(f9|4FulJ z?BBer3I+}Y?fALhUw@&*V8URJaU=9aL?Pit>EZk_e0>X9A39tZ8r1cLo5*I*z~ z(0%CZqI=D5anKlWmmd7?ukqcBFFM8GLD45Y+KbRNM|OR8kl;vwPI^4vca6XOxVxc2 zaM18EJ8psn2ZO5gXx{tm>--H{;b4K`qx8_nP8mLR2aHF-KYp%{UpYd{Lc?R5xt;2} z_}l@BR9YRjdlyNAf?Mg);==7+$Im%lG!#r8?y@(#yY~zjjfUsaqd{-zl>Z>1a&b|5 zsEaSU=PvTQqfiti%KiHF+xNThkP={&9?d`YZ@bCE!4Oq?@Qd!Edkft$_5mLgkN6g*xC7W49LRq;ET@v+uhzrWwFrP z`ud?$xQ@|4VEL6E9ru01-4_{lh2a8mtNYKH-8+Ru7!DhyN7MI|dA;GrchCU2#Q0wQ zsllRhi7z=_j;|V{tM-C|gYuyCXm8*1=N>|JtymTe+wb6m&^fp|kuWSAOAn`zSNXY# z!$I;6&^_aEFo7yP+DCuN{OiAwQ5k@EGe_lLZ=M)f7CK6gbqC>t?G`CA!0|P% z^XLqhF*zu3Dm~i9_x^ny!{r9UVbSO~Hm`g8umOQAAe0`hHU4|&-`7x*fKZ7nJ=VpJ z{oLc#w+9B30mxEhcdoq|(vV ze)qo7^?GHZ;MkWQt)YMc>Q5o;kC9<~i-iG%7p2GZg$n&O^ruU(K+xDHJ=!vi`ntC} zc7Z{~q3`MnpTE5#VL*t`HOGz~9c+V3!{J5g(RRJ#Ha|NzDi;LE(xbV;_Z+W!e6QeO zz_BhpnuAW^M_-2U8y1Fx>+alh9vwP_goH+H>Cybb_xc$A8 zWH51fFFhK3+)thL{j-AOVE|Wpya=6pj@4ZwJQzqQc$OZ`JpluF_jPwNkAa5XfbrUwX9I+U#_@<2$%G5I8738jKD? z=vKGiz~cdVD?Qp{r;($3FGdO&k#6a+4L35&tiPHBjWhjUO!)7AKAQVS`|E?rd0CqK zhbWExWu>YAl+8$s?jJEL?KwrwjQ>LWe)&?;dO}D^<9$7OFU-lbl8m-wwP0e#M z(xK@U`6vH1N!nw+J{E@hkfr%%L zGeBiMPz#Hi5SNq;G^ihGPA?g0AHUH&{^TmgA80UsCLwcrP9`B!Wi8ZAr?Y4fepW3U ze9fe^fB$IyzWUE!Qq!2!$Dqac0tSRM?<1&C#o*n~h(?ADu=|H~zi?kb(4PCO^6$U- zJWNXChlfSm_?}6F?><4Ad*@Kv`!zS0uY0wTl8FN$B@<&!4nl*^o#UA_=f1h7h34Jv z&XJO)9osdbN`u{#(p>wbz4PQ`jn>SUgtTSPEQ(=S2Xdi7c1C;bljhh343LyaUmp!M z5j3_Ara@CrXIl1y@kn!+Wzd{zG}cC2^`<%t$b$ye4J3w|oJw=*5euWHgVynyG>-1G zQ_wUzhYCgCEEdg0hu#{SG={zqnP~wdEiFQy;AS%s+JoMamFA%T^G5UMn<;7iywUi% zfB_vjMcQ(*M~m;$;B#wI4Qulu$lkdeXzu*O!Q7%6jh#bWRW&>lhtc3=VX#Si&K)Bi z?Y}=4Li7J_X#7-T(zmn-zX#-dTLjIUDrk(MEJ#~#S#1iMdK0FYOknKyHt%DrB;oWs{i}@d-6Q;yyVTj=RNnF&pG$p&xvI3B zr*Zb==8?2YUUyO*tOhVa*pNV?D zs_w5>7Amu=-)_T)gFWt6`Fkd3(WbteN`kma(mzO{7g30+jnjK3DYTHh#iT8`pl`ll zInp3IDd@TIucO6H{WfD3_D$Gr4*YyJ#mZ-sC13XO|4MZq&r_w^#XyZ8&SQ1?>ap_# z*jP;FA}WXic84s3mA$u#*k(aNwNx^D0dBhZh{)9ZrxOw1V-E_L0_kwWpGC_ooJQg9 zsB2C3xP*%nY1x+el^;b}A@JI=8XqTWRCX$;>S;2xenBX1+>=wV-nulQE2`UbQI8ed zaw7UH^5xWVKBb^eQbhHI`{>+QhC()TVT#*H7V~_+#(WK8ew%a@vdi_c5*;v(IeD&u zrS5~#76HW*)y=Xm+=-0P1t=8zP&SgADczaLFl*v;G`A*LVuDDC3oKqU zZUTPIXl+V5Y4R$o0JCH>SB+)wzF$Gf2 zk25w{caH`F5#Hl2Py}gG$Ua(4On97CXyl9z7%?&MP+C+Vx*K3xNc~xsWh<-DbD99u zA>|Y3h;Ug2?U_%!C?;W^_S~sNeB$q{yQlJEs0JW zy@+&`uutS+X=zu56A{UE`Q;Ua3##ULbBBtj!~f@KMcMTAJ-#mLK7`NpIQQypTAF?P z?(k4bebH&Ya+ix_M+rx@zuVz=nQ0{HZfbAJI(3A4zS#wzhU|A7(#uPcKXv#RdBM`TQ`bG(==bsmT$u)79t^I_c8cN7CM4oE~v~z7n~1;`p7aT-Dd_ zi-DutzEP%=YQX+`7G6;bMx*q=y|2>Ju2Fz6+)&^|t%d z{+TqEJnk&w;)j{6lg;{7UFXGiwGD;XRMn71cy25Sb=>M*cbJ5kR-eGwX<`4mBR;irt~zH@yuZ;64@|(goScSy(GC17uOdnk z_xtapLg)NeQeXLGSGs_oRPe7mq-4YEB_&s9xs`?kyLaUNvy8Y9jgC$FmHhQJkyDu_ zM14KjZ|CU4TpR@ur*?I@T&Mv(dnGB^q62ob;jHY|wih$yE-gq;eONgpee3l>Z~BoqqY>)MK-<6OoB1eUJ0YLH zS)s8T0L4lDHw-JPEb=gj^grM2zsEy1fo=5&jk5Sa@-k^!D@+-?UzF-U*+%-^9z*b|KwyCm$q6-P!%a9?2CD zE6>V)*<+nZrzHH>D*Q)23E9xvgpyl1g|1h4b@z32G0Fyo!M32zvSeU6bAz3 zzJEny6LdFP^nOoW%ke;9(4(p$m&35@+!|gr*(8 ziL1|C&-XlfS)SE&z@@$Y)lBo_^9inw+Atz_vn3#K+4;92%gGA)vU;FrjQ-TWqrF4ij)>L-X3_P?^pVkcWblve25?#+Yx(rIW$q?phYP-_7 z7+M+S5wdA(2wuLph&T=-MXiyPKmN@iOAVWRIUXCa%mOlv$-Ouz92Z zB+)yEf8yo2egI_;**dO8WxE?nC$&1!vl?)uOUcNa9&H_X#cY%o6U(s{a=@Y2I;ABW z)gO>>leem6WgHb(eqgH;cyM(pgRw0e`rdjp5YCMI*7O^q{%tR{o2i3B7xbk>=+{H9 zc~Any?%UJnt)IA8R14&VKRszlp48$fWi*qdjc>d>x|;nk@6D>@?bpiCy82iymKQE| z7_P42z)i!7U2UH~kC>8quSUgCpnlM4o!nFT&A(zJoG%ebYa69UF#qd}2a;6r`n$*c zD%RV@z2BH-3+-$lX-CV$~b|_TPd*#aqE(Ug{oxiPXs%^Hc!KE8NH1++wooyz;}-(2j! zpLBD3-Wf?e(Z0|Il|SlGarMu*aqq)9IhG_ZcTX$^k)obq_8O)>w5X<#)6i#bAMn0i zB{6^b#&t^G8ScSR#$;pZqj z@HnsS9PC27O1?EY%FFqE5|M^Ub{v+()X}pI^#b!elxcaI0@98l4bTajP~Q0pP9u-0 zxn39;X4cbLE4@;k|AJ_relFkCVR79`NzY+e+9{+U_DTI9V&-7ziA; zo_w9rrNy@;`872+1$%s2scI+SS^wB$b~ zPRW(&K8`LDt)7p)0T~s2Bym(^+Pgh69ar0uKBDrl{Z{T3UzlbFe<$bLFuwix7~)tv z|8O+=cL$51PL)HjlFotu$}sBN$(KXrfV(Sy?4%g7e+K;XUr2XwW`e%hs5|Kx=!N+# z{hf3W7c^mi8sg9yK+TYv1SA(GdIAGz$tjI=(qA(jyo;fr8>|v|9JT2PsOfTV+$RS{ zNpiZ@>*Z^mghnr+X@lSK(j8%kh;n&ro-QhZzqPbZHk=JVgG;qg&EJ}XidG`#Nj_Xf zRA^i6Xt$+N zdl{liZQ($s+v!{Lf)~eX>&2>%ZjV#*SH_MDd+lDLsO0IG`xg&y__9Vf{5v~pCfj$u z^+jrKgd7UMy&X+dR2J?oa+uRl2+~#Q>(~vGI1y>=uDy6i<{b1M(j0Q|fs*fEtmDVE zx2HD$Y7S%qHp7Q-YdtGpnFmwNM9B#sU@9+6L^IQW%v_&K|EH#DX7hXDAWSawnpK-h zkhcDMuGPtpu`7-Se6k$ka8VpBdh$c%N7qeyH+3>`3}xpk51HDEXhxL3p{vhKLE zumUt&zW7T?M>aZmz7j{vB}*C=%Zv-G02CtYkBA?~siot-JE;6b-lCR;fmZyBm-^5%_wsUkkytm|LUp-)Wr%K1mxi0JX zJ4C|G*tz`P$w0k$Czw za8ha8Ccngpib?Qbd>?k=SnGE2%=cDnpS7bK)Yh2sY24(70L+Aa{l9L?(%$PpX>&ot zCJ|fzg_{?5p`>{MyTmJZZZ7zJI1MHj>ZYtD>Bv^S?%TO;1*DQ@l9JDdKic-bsxY=O z;Dw!c{Je;XC#5+>w;m{m$QkFwk-4abAAM*eczS0V%)GtQkDy=aNJd37@f-{Vs5Bd- zQL0|*9FkH}>U`0P|2o-h=E&smtM5rzd^!saEA;qncnVuQJZS#M>uakJyI8Gf?f_n_ z_&k3pMyFqKKNuc}uD#(08ESQ-xB0&~3g0d^PG3kD>f9zz8~P}HXXo!EpSF!=liRme zhH%vc0s|K(McNS0&0kK?TR*Hni=148t1>4d)Gc5 zh29ywzkP2*D>_EYFO7bM83LnQGUWa5GKBjEDat^4g}ciDCA%UtHWl?w+C0yD_wdd; z{agqsecN=*MMZZqqSq1OEx@N2IO$}W$_ukuINpmmTqrGd3X*RyV!T-3(BPz|12F_t zQ3ekA`+SI4i~T0Ugbc3jYelR)X|3-i(TY;+q!eiENPF$1spWnArJX8J>j7*>T;2Kb z@%Gp9m?m(#k&9I@H08ZaV@SY+06oWO$n=KR%50eZ6G|>lUO|R~mMg7|ETWC`mK!-+ zDYD5w8+9S|A@5=Gbn8Qpp6(pJzhdEKN_h55G4#-kkCQ*=H!qt;+tptM@*ZL#?SSo{ zf*^{RVG@d+R^J1vhqJplP1&Xla=PcR0M&pG-Y}Tj@L$2s)a#A|1p|8Hl&R()4(tpx zOUr*5L^h`cBRl9;Lk?*8X;T8j?Jq_fS2bOj{|p`Q^Bi<{0Hqi@-Qj-T9MlZyHT%-f zJi}|3G!5Jul0UqD9maQufk~3Kv^C^M0jFgFUw7Tr$z$UlvUed4Pv+~#^W#ZJZuuPo z!PlPrdL#_z67bVfgoZxJ;w$He8|6JYI(F*!nQz$!DL3Tg#ImVE18$Z-JIv)*8jEOf z+`PTh6=H_7r7EaOc-Zz~0_tST&e%J<&9h1Zc3LkxjpP=&<>?=o+=nVFbvS#ew-S&= zoY?nFC*p3YOLrVOZH!{d?o@Mo(q>49e8wF(M{=Uc`_v(L;m?Z zVpV1{b)qb?437wIt2tOd=uAH3;%1;}J>5xVl)O_Q7^Nv_JR#F*<$qU*G&T0Z=40gb z4H+4wrGMv&s-u%*L*LWiPcqGzimUWr@A+QaUTCW;X__5R+)A6~a`iqVG z^FDl}T8D1pJ~dHFGUUu({k1La$JavT zHWQ?!M2ErJ=V||zR?ZOo^Z--rTCk2@uU4OL{DH$>Y?&62j#oRNn)liKS<8?YN9`9* zZx^)E@%*I{==@LnL7=^tKQGLH*XSzz%C0+{j*?zN`yyB%;B-i)T()d-yty>_aP)we zft15xca4urW$5nxMP)%Mj>EX7#l^)}a)MA!8*sY!B67`MijJn^(@&jweNvZiZ%^HR zGq~J>+88vw&kMI38fNajh=j6tOp64!{2L1J7WD3?6tJ!BHq#8+KfLUwFa+ptD z*(YRXHaRY?M%+(JB_T_=cz0*eA@J&(HmU1c;F?Bjv&&OgGj8FDLnVAy7}hM)vMq3a z{-;jrrIL*q@9Pk&gSH#aXftphnliV@eIWBkKwea$ww(myZ1e_y4wlfdOpD*-8t zC*)efP7Y^ijyspQ8bVvFD2dkb-gn-l3 z$0J4+VS0**iSnU{h`(Q}XtbicFAh5H*pX3=;C=Y~R{x!e-Mho1i*PX!&Ex5|ukj%1V*@UH)V5 zuhi}`RMMnOi=60rUGMyUtHa4@^R2ZbW21X0KA&`C4cNsC)VYpN_WSnji=EXLu z`Pjy)KX2=XZ<tpL z1l9(V{XRb5`#Sb%zFdlqKR03pvDW>YCW4Dfl=^4!;7uA0SuX8z>!+XZWd@DEn>X<< zYK6ak^_n_2mVAD?&&M-Rx+A`EMF=dPgA^JXjnnE0dANAoX+XokV8r<3N$S*}I3nlC zt{*(Bd)@O9I<#uszHgH|FBB`)ZrTF z{Gg)3pGF+Fcj)NH@FRwnAuT$tZvXhKA|gg>(##IE^Y=qwcqTc9?4VO4cIu2p)ROZw z%rDH}suflS{i^;nh2HS{G$I(Lzdd%BgO-BjVrNEOKhUdPoD$?R+bZDs_4hSt7Adjc zZ>g@sh3nP3b)O=a}wqfaCD z|0Lwc3ueRMVT~58SBI*yRrrkOI$^_8KYO33u`~S!ioxa?RyRn-Tu2;(n@(Q)zWT5? zLG1MLsBIi|)qN??gT8HnTmm92NTZ zWwyKvFZH~(mwOHj2YNtu!LTcxw#jnEQ$zBos8atwx?w+WAo&SoD))CL8dv_+^~?CY zSG}l5Fp5!;-E-N)Z-lQ<(8n7dKCbQ9<1X*(jr#Us;d0{Zzd1@Vat{AFhxC8JVT_#A zls}$V0JM!mVW6muvI>>o6Pqj!&${^3Jz4i*g=B=dGE^)K*yA(W+j4|qW| zI=jRrs>9c%O`L+?wwFWv_nc!7i8CQh{&%|ONMcChpP~|)0+H8zbR<+Q?=)IeTOKFG z>cfE<#NJ%f39$};ulUz%<5jfmr*ARYKqszbG++9fTO zLEJjMq5LRfv?=gHolcbF$-l5YcG}+WS}4vxJ;Vucr<;K}z9xg*w($e!zdN5yf$CMt zGgXblf3g*Lv&qJZ`s3pc$rxFo$a}%3-7Z_H3}GuIea^&qFF)xn>v=Mc`0r=kYZUXR ztd4qIc}F&dm7EA?M+;M?e1aIs;J)RYD<2%Q#d3b+IO68a5ibF!rZRaTW}RU=OY)hM zx_oMQ#|bNGUDez`NenYmkWnlBPKg-tz)Q}7`FM;i##6Fz{b=*Tsw`f^oFf|{q`na2qN*Gywc5!-vv9m^d zR2zl-X{UhQ^BW-y(IBO$N1=5u3DsEWn6S(&fjt+3nB?pluT-(I{x}jUis(&3_Y*|V z7Jvz!YZ|7Z5V%;f+cr2)XPZZ8i#S0e$5N^mm105`(0)25b52jMJV{SPOK{J~uW?ss zsvWJ!1jc8~2o^IjyJ@&4Pok`Fv3erl6swPTCLEc#3zD@eJ7bC@8tL-Et%$n-wzxsghhKX}#}&msn+Te5MY&o}{#DPQLlv^wS%ZyYHS=@NLgqw` zpg1zLzvSbo-|8j?!I~%BiJ*eGp3V4h1w_ptr&zxtCVdF3t{3w;0r zQjRqB5Hu29dSo%sBW9d4Tn#y0e6GP2JBsITG!+{)6S!#5&uNm+jGes9Vw3}cwX?Be z?21AZfEsg|ZacprAD7U+YnW53!I_RC*2Rq0!O8hNp1BH`!&k6Ya?6aka#4fAqv8rCHPoIomtnEiBk;A*N$OF ztY?{@AA75@*90m0@Qab$VvQQn&zUrkbWVB`PD~1;MhMCgE?pZEQbpH?7n#5~GF>5p zF)J%;!-bUH!wL z(eOml2Xv0vC(lJIj66jLS*C5wW27B5wat+@GB>nvMx|n@G0QOw3`J;X*J|d0jR?j$ zAB9u(>~rbtxWEEph~6c*3FB6;HV8gan^oe0r%}UR<&b}7!NV~rm6x8pUQPa6 zW3oa&gnG40^>c7L9;0ZGuw4$vYA%dXRt?LBa;-Yu$W#>0kvm9 z^?pmUSZbvolz~F%7#LgLNVPE<`!13g^9KeDhD5Brx`sJOJBx}~SVSiCEb_LaW^}fl zb#BmBtSfhFiYP+3$R;PO!JQc_yZ;#prykV;g?~@-<%ua%uv1gU( z<_t?2IzLl6VFs(og}|vZDj`YOUu#&F^{f(8usGYaO$r2Inyw&y?@gw8&JH1Vpj}ik z55kON0AZT+8?_cRoj+V7foHt9|vp%8`M`s`L` z=%g{1ds~({7KK25$DkGM%8YU*O_;2$O+jqKS@ZpP5?XlyCG(tuti)_&fw8tB0f&81 z?z!Z8-+z2EcE=`P#E3BFf&>W>k{XF+v5(hnAQDEQ4GB9xOt_7HL_+Hy05Ww2p2C@5wpDUu@)vU^i~Bs0*9~mu>ZBT=6f60Po_IBuiS!_{k7&~L@5sRdC!de z=&EO$4Uv(X(YIFu#SY$b)Lh3hX9Me%v7E%jW=J>@#AXtmF=a+~H?NRftuzrwM)y$) z!xq#)*VkY$+zY45O;ODDaJBF=ofXfTv}F;G$w&iMW1*d|b1ra~63^%|(H$2%3v1Kl z2XG9u#Rg72?q?XQBq$j>Wr9bTmKh=ilf{16nzLX){OLIhDYasRbx$#rlh8c_YuRVc z%nI&XY;h1t5ecAVBzxRUp@s6+r5PuKqB6wf(I=yvqfpuT_o5YYGI?(e-L%=`gKIrS z#ZF0RJjBRN3>^TalNYKUR;Zs~i-Ro^AuX-L8q!j<(;&=sn*>m9oGwBy zO!jkMU**d!q6I%IT)(i<@XJD2JBC27$OFRl7nj7q<4rSM^eoH~^5we*Hr9$V61o^k z8U&k&ktTs!G9$m35(D4CtF*{T&?(9=qkJ^;`-cQP# z;Kgt@F)d@tph9s7YeXMrLo`>8?N|EHC&>b`Ru8qE>r%Drx@7CZL0xCBS(pY ziwQ~5=8V&5>tTRxWneE~B@wikQ%;SD4EolCPe)-ujEsfpTX=qcw4b4wzm_G4l*CXH zkJieeLXy{U*%cG~#^Le;qFcmBBz&B(&P&*9Da=2GZRT2?0PeYZFAIT2f9`7#qbQ;R zO`~*$4DCuy&>%2~F^_BvW(?9YldXqikVtkmlxz||zw3HdA&sKWqOg%oww}El3s~DS zFO%HO4>!5#isizlKzvMdSiyJ_!Vj#dmM_A2wzBrtv>Ce=;IVPBQ{FGD3?-{U3z)Luf%F4on zR9M6~3axa1IgyML{%5I*K}FQ0J|o0XnjmeYypCL5^+c}j>?55ZF(`b*gBjeoi;?$@ z#*KFHZ&r%R&lK~E>6scmC?WwsE&eeGj(Ln0qbn*6LKX~AyKBq=@ZiOy|A@)yWVg%h)oE79 zLN1gBHppszvsNDAABH!4Qs*9>D7o0IS^iiGj`>8WHi%|I$QG$(O_hs6j5Offqgimw zzLPm~o@qJx*m9^6;1HZ}qbCqR{t5~f{V7`t)WTbAC>HS_gj*f1@mao(@lA|FStEvW zs`;mWP`$T>l{F%AOkB76a0EaLc|A#VKUefL1wD~WW_J3eod_zQh{xiMdPW5$M@H~& z3^2qq`(i?kHUgWTg4@sTVa1X47cYP-7Mq&@Z!17oBtN^E2x2OTEr4Up#5=^qX;Aa0 zHu=E2nygweDrolo!5@SuKwN5tK($WdrJBj2_>v{NfjxE@1V+cA)ZxBKO)3JC7N)bp zMub$T+YK3=0e}YNWO9o0Qv`)U2=q7|@ZZ!RFpPi`zUz-PU%#|r)z8Jn;(P--$4bsc8P^{dB>D(YVRlmh<;x~Z1sTHS zXa{vWu`(n2WW6&depLfP4DG|!4PMLjtiTDaLg!{ zhfAh;=h7yp_&%9X4JB=mTGnTbpzNIPNIE*HKz^1LWibE(0~h=;ARr>d1Ni9fTp>TM z4zJC^qHaJ29U~Ar)R<9rg}UR3FENU)tLkdW@*lnI>oNP8~|k&FcEmQdFV;xMK;5JV((vUW+j7?x`nu!kuHg2_5; zj&*>3du&$XkjNGnCxjo;0x-3%c12WuqHi$`yIDxE5fG^W2nS*bxY$is=JQN0y_`1m zz+$K8S(6%tF2dHAirwPUu zVN^@BoEgJ)i_IzsNN43E3)A^i1G3R|vuEal5b7k!^vJ5)1vtZnwOB^l=@VGg-7^P?NDkn=e_oHzdj4dqgQe2}rLj*g36-^XsAfrIYZ7T-^ z6Td7>ktmc{qMJR2DK3+9?*v|+s*7Y3murDkHp3QCKxSpJu#xZ{$7F@ftj+?SiYqha z)pPCK_jqVh6CuYmCk_C2K$u#)7-wG9|t z^<6*I{3aa8h8&sj(g6j1kWSZFr(Z1?Q3Sw)V!{%qGS%;jwlG~R&Vn2>xkKs&jZA3d zoC>~UG!|oZx$Cg50pljDxVrIJT?{4+S7^%49w#QAjK5cnha!aGh?8uT>o%4b;rwY& zC<_5rMOgb+L;R>YGYawvu34l>vJKcxJw2oXlHWLoXq?hWZ#KmZSVr%KjjZQHw{u^L z(JT1KQY$wnX3d^vgsCxuUn5ONEg}IET9`iAp=N^$Y6{XVGSn?MQTR``Lrgmdh`|%_ zQ03RAen?J5frc>_Q^JX@z&*cl!s(Ht6_y70=0(3KTu>NHSv8YTDRx3oWR{X|g! zik+G1soTL8dwM~QbH2H&S+D5H5S)CWkTa>#RQ}(UVLIz20+3{!Y2iFc)4et27uY?G1-|VJ6YMIa8}@= z(932(v>Tpcqb?|66EEPip)dqkIgf&z&cIQ2RB9KqRV^n6jn&N_HCcR{2wF(U23!k} z-Sdrn33zcrCI1%Ncxr#sB1U@#{D{qT94rp;8}S5GL0&FA?uYYWu5pP(0(^2w{OXZ?-aCG|`Jw5V z7DlonG4c9pO z758!2K#YNXBj#zAeF_N$XDB4qu0K?;QtRQPxA~}NM&s>DTC;@OxCjMi0%H}PojXFX zPWQ`hp`>p`VdHNm^B&mT{kgc8VrT^3(9ntQAu|JAOo35#_Hy=h~5n7A8S zIrG(8z{r=jW~dHo5VC*iEpIXX7yHHBeV(IJSntaEjQYs}XWvOdS ztw+YP3WcGGBuvG0=Y^Sbi~%Yv)9Y30cLzre$o5Hl+N-zLTHG36ih`SC1d~aSR8RO) zY}|f0p0&T(Q7OS>zl?+?5_qY`iJ=C*4DnP;H=j*j{^`y*bGyDb{rgV-%sJ(7^*!y+ z`R2*^Ucu!%`XQy}z7enNtoIKyGcEtYM%<>rD0?KT*QRjaiNp%~ANsDr-is}A_hlAw z3->Qe_l%lK{oc$^oatZEz1gPjbsL}E2Iw#4+zFr-sn~aa>qfcVMj2UAD7%#N0$c-C zqKSy~%Rd^?2?PA>7pO&4l;UhQt724x$6d-KRNl-3p42lWz%%^nfw@H(Q_kB-ob%_w zEbf_cQ7XfcK7Dfm%5-+NEc1=u)#$rOU5mHFK$fWo!njnFt>eqwCk8h!+0}HXE3FYx zPI-5v<*ETBw0^@rYsMTQmSWO7w)z?X-44HJJ!geTNvYiZ75WuSO$2j4L}K@1Ciz>I zKS)?vJhG=FP)@UtRgX>0sHY8#m}Cb-vA(oKbZ>5$-o(u@6*e?tkS0c-Ibe}XmbtTq zlBBLIiP$ai>96E;k*=Zw5(;_q`+gcJ?=)14O{!kfa()~cj$#i(7dB|R0;f&{pLpRt z!-a2mZuRL@pu{GN1?Aa^XGf$Y-u|rqiEwM?pm(*)&slm*Ose+6T??Bnh;*Oe9$j}= zF4_Vs7#cEC(c>%Cw_K|Y-P2l(Iz@YkPO{|ffo$z)^vBAcs)6CI8aavax6ekRjP3I( zbq9tGVwT#W6l7Yy_v4>5O1w=6DA}zH%$JBhSIM}FfVDmY21-$=p<&vd6(&f->Fooh zjwM*K50)zvnkZxE%svt^AoHrtrd2+Z6T{qZ#E7dV*G!B@O^O*Se||P8(0eA!M(9wK zbM}$VJ%15Nd|-t|xl1{IjEyYFp8?LU>`p(|HX$9}6LAkVd?lwDU}_LEOX)lDA6L#nNwY{yyOW{uoPIK-}`6*7fPIU#|&cwxG_SARI+a z~C3Tzo!JNho&>FvgW6x+sr4QA@wa^@wT_-U-mM`7zCaZ zwe{;)HFf`-Z3E7Tc?#?N_x#N%&3|SN!M7N(JH}$2c|<*58Me(#Yk-*RS(d-W7Qu_}=nw8dBi8GO)PTPMaAN#*r^6 ziv@x8tm0@Bj5n5|>-BSPFv26QHcel2?QRmCqMm~+^?H8N@Jflz%nL@3(_i>iQ+Sg< z`s-x*9nvC^$4_(kaQYV|b8GW~JjwEkD37#hK|ZMIeE*;GSnu*aa#I%maIOZ4a=xWj z`ei(eF$w1r`yg=DYj&*aj!nQcOuvy|?2o}CZ^^1iHv_MnM1RJT<<(_np~PR(5|8$3 zInoDL7v@>>=jbcc&wn<(RQ4s9;#59l5FzN68nPHUT+%79Cf?TC*kkWhMu0JeO%Ln` zFn)wmtz{XACt&N%X$2O?^0HER^=_%JCgli{-MCJDG!4&wB>9#_ugPs-#3;UhV@Mi& z+h(Pj)<^giPvE~!$)#j|QWE3H5w(o14Oy9A1V_7J0jGz=%n#=%&~Yb${r}nkSKjeBkAo)Vu>`aBl-1Y+vCA}K^*8{TM zXJjj->E%rk_wdR`-gOLkDf%XEeGl}V!b^7M9;%>OF=L+7h+9dN-`6?*GZR@V#0hb} zM)9G|Gw!!3tY11MuO;%yY?T$WJvfuAhZNcx0G+3ko7V_$R6_=V;r zvZx@h_mUzM?T$E750~8rlbn~_46N(!MKzwycS*izlYMpU%b-eUv&%A5E8ZD(dq2cd zQA~2xKX#u8iGpcht5M$3?Dv^prN}u8K6diusN^_o<=-)TvV&w}e>3<366&%r^Uf9rr>A29~(34quKTYVu^~p$H zT+aIM$ZWa<+w=Z8B%O8R0uL|6c9Xq9)aeUS+skkpkOOU|%lo@Wj#?7`~h)*p%U}vc7kl?-JDjG>0;;%1u zuEa_IV)@56>>2JvThQ=WOGyV3 z7d+hjvOf+({jNuehABxm&BZncL!JBgkFp*;foM~K?+t)r9}_lBMjBuJ$H3~MR6zsa zEz5VpM3@)ZzeJ4(y9~i}uNWEI1C7DHs1e6rg4`jiy;mjaSX$4-z1}gAYQY2BIEowEiGoVe;(x_Rh0W`aEm{`0=9U6kVVN_2&FEBFhR89y&S^^5FF9A?Sn zA>XCzRC-{jInYh{l|IPmG}g|{^FN(%rjX7jAD^qRbtmyFanU~F>wYCbFpp#Y$$S&e zH;!yuZ|hEUc;=KAo*EsmL&CixPeU#Gv%6j9Yn9vRdyU$+lu5&eRap51w7*Hy+MV1- zCn@)GWU(nG`Z{+8^-B(IWsU^bXdMLAl-%oCm>+QwC0^g>w1dG4Qsn$YR&$ukw0%Ek zZMjglUTBtDh8#`)d8M2u|Kgbs<9z6-z|2=e^Wju{F#*%4g8vamkFgtQs+Iyio@gPs6DPoP5Vq$ z?EW$Rh<#Q2rc2_LlQ#j+nsYhlXZ&qL?Vb~wNFskxz;#utYUZJRn>Aq_{{zQ1pZ_3( zie6Y*wd+mE0`>I$Xc@MIQuSk|bB@D}i0{ZGrE!@cruT920*&I83uZE3e<+A^W3vCNnV?56jv$ z+0&Gtnug!u^u1Q!Ka`r$aGhvqf?BvUfCXZ@V@~18p-j*Co@64)~&cDyU=QVy$`pA4KQnk0Udpj!gkX(aUD@nQ%GVV;Wqgt}Jq&TWEh-;6kG=R* zeh=QISs54@`c>flPRBehU@!c03b82I)&V~mxZBlCS`%1uK@IAer$LR!0i7%CJD+}X zk>I0~8~wvufVS$~82`YfRkXW@H6(tTL-$6pgq~(hxc>I9x{hqWx6?mFd9^k!S!y^N z>|7Xp6)pW9ZlYB5Ja*|<6=Dqstmsn4Em^o#lh-Un;=htu58XHE_|pSgg1i?yk$Y7E z6jKaTfiBI~d;G|AsDQRb59ebvbk!fzEY>lJ>p|}wyO+GWrlRnS)=%Qv8^gz4b9bihv*4@E%Sa*mgR;XlTtv+3s0B3ABa{roBpfD+lofa ze_p1Zexjr#1uoaWASK=QsC(ppjXG*`^J>P0mi4`{dJ*{0mhx`xJ9&0>#F~zzZ!a3h zLt^gkC$(1WcPu-87{%x4v(Pi1$p;N{CgGGZ46u)!s1kT+fXW8s_Dg}Sl2|vI^{0IJ z@!tetoMAWNBs0+Q${3@t=^ePOc!cV`s^-AqtU0)3#x3RPWTFz(J*^54>M08D zZeeTm>d0z-%oZ@e|JMC*r0%wV;uol<%-f+J$zT2mvQNQ1c-q%8ijU8K^uhj|^d89s z?G`WJP3}8(G;(KakLb78FDF>MvvU^x4>_x}pYw6-azHwjqk+%c7(w9un8L}hV*7a4 zY#g*(N-U9$@I7Yk>9rF2FK4qeG8rrTP%(OOGg`j3tUfu(?wI#@lTG>Eu&Fa2e$l*> zlg8gLNPr|k^G%jhl-$m}4F^v}@SKmG^se@`jga%Kb(bZOJ3Gc>o&~;9ts3G>sed1!6E$dYsH89q5b#XPu3-0FM-=h zx}Wg!jT8$kD!vd-Nfz{rZ~u0bs}I(XOs9D4J*@z~?>id3u6olpvRCAMR>4@MwD6+6 zTP}`O(0srMKhplf^{l_Guq96?MHv6pUiuRwi6cFiPkrawct^=Zjq)a)3;#1reHh;j~8#gT!z#d`e4>uWYi zW)r{Q8%T~f@jVkYG@9X(2d>^xuQKxc4j*d){p)C>=NRes%I>!{*0>Mj+1Z`Zl+E8t zt6I}%UG^;te`tB7V};+nzvr93ur$kgO2c(!p36Fu6m+%ApL?3GD1Z3&z?p-(?yn!s z0{c^^6tX{WxHYF0Dbu1S%e1Fb*E7btQj7Wo*%BRt;C=UvIznQYipiRmE&88)ojJB^ zoMPCb7woYA&3KF41|l{qz?bPAxO}E<_~;3(p4d~IJzppWT)IguFi~ig@%5BI@LhiN#PIZg4Ex2WTcjM!kFfUeG`jubS z;?v$a5xaPPOCRoZ$a{5n0|n*#En7>@6vVm7YDET!z$bs&iC|4D{`yK9GWP4lkVlE$ znk+5&?^>xppIMyr|750@mbqPGK)EJ9nDz87eC(Yqv8wTZFRzu)F+4PJH?JuZ)$QlB zKt2(ZvGEDc{?6L_6hAT~@8udG?826Rg;ZtR{u~JB(-=36MX#K_vf_eId=E~owj31J z;&rNN?~t%4A#!}+n2_y_%e&;aTQZI)S)1A@rql^I{=N0-H^xdwQL9^2=Y^kcXl&#E4}%AeK1*b5F;~px4s00o0{3XH0hm z5X_VB=g zLBXHn)M4{_&U7ZwEP>E}Y8_8qt&kbd3oyna;E9;a*c`kXHZAVz$p8oE=*H%bj3OkM zetb0$t7$qE2cVOtU|Ar})>#_Ism5H^(5`XiaK8D5$ToWHf`dg8eOSIOS54Hbu;z@o zev2P2D1wCXtqL47aFN3A6i0GkMRfD&>YX;2AKcq1pcJ(yDM;UV@$HNF{Dz`c3o_|x zaT7Uc;&}jFezVCsm1pFD-EO+iI6wMefa~tgX^&xKmDC|s(K<%7E5r)LGkp^t6N!j6k&*Hq(A7w>qYCi}m z;>^A9ao9IoGw_~d8wXdcRE=i{oEEw5rpR9|ct;DBrwz^?ekCblhK5Ee_kj4lekk#0 z`L=M69AGgrrt6}W^Z!zI2vN+xYI6XgxFecoY1H|V*BtDRlAVI+7z)kN@*Z#^#}b!u zgJ^j6ncD{|Jf_s?f^T?{0&iDhBrYgIMyUvL2aG5fFW_Vc?Kl&iwtC(agr@vvnWxOt z%;-fH%$`#l7X0#d8Nj^@61k-@`x*9-UtS?+wo%BY8!_i1G}L5C#G5SGQ=pZ@JgN#9 z;MT%w<^mo-9JH_*oY+(m@p*F6&4LZ;oTfvO(zp=Z$T;cz z{q-ylV%w)fo_EZ(?hGSZtr(_uiGq8v3eF`eYSQ-QP@NVhUkR1n3;?U;s(1~_)Hqpz z5bb=_)QayK`2T3HdQuGVGio0lSkiBnrK<7@u%LoiuFh4?l}?dCJtnq%EyBou{MB}w zX~+5MFEb^O8qH?bmgU`EAZD$Z6f^EURCF1|MJT2Du%n*A2LK{E5eWKjG$lpVm#;7z z6ufU$^qC}OtT7fG+jUN`iMq0v0$>ui=U=`;4@21wHLlkP2E(--(!~ zoms{gwvwT%LBhQcBe_d74G=%vHy420X4t`!1VOlr374f3!Gz!vK0ub`Hx3mA>X{S1 zx=tgH($MPJd7rwK4J`%!Z>bg|Pv@)*` zSa9R6O+irKyG}ciwCKR{a@{kPa90(kXBYD)BQ(oQy~T2JSBV?up#HWUsgZeV-82Ds zEyIPSpsXrgXyUxO6cUTJwVJx_e2I^)&`+VZiUt$a*_DkgMDG4$1MQoe(yrfA$_3VZ zx)a1mDxKZEKG&P47PQy?2adBt*GUR4VW&R{`3Rf^w4FOR!+UIa_cc0^`eyrVB=Liy zu~m5N;|U}aVsn)MG<0Y}0KtR#fNPsTdBgbV1GGn!U)&$gO6gLhLTJCw$DQLFqu4ER55@+b>@ERGb=wYC*SaCC7+3bgt+eb|Zc$R|h-vqC%)EL;!;- zUaya`dGY$v(N6V$W;^_a20jjBcrGWSxfuK6%m|0H42D=wK%0mPqo2W|5Gn*WkSVUH z(e7KRy)kEVuKwO&X$j*gOS5r>+v%C!&lVl*#pPWFJG)K<5sw%NFUv6>_8DtoDtmO| zY%S0s6R}K-IdqFjgvGYa0KsgJ4nU2>msZXb)Iwmn#Z{arfxA6;`xA*YL6@S6CAJ`5 z$p!@?EY#Yn-&KGmpP5H|Znyz6A-vM$*r8bfpes@WwLgQxIwB!0glJtA+buuoj7>Vo z2^5;zy1CdqzSowZKqHIFHszWjrpy*u@(8iA^(YB+5YdU0p<@=eS&%tFerS!sd*Y=O z050Exoog^|3mfHKId1}E?!upBcVWi|hTjxK?%CZ0K-)D76dfy3&c7fIcaO!$yl5?L z72Ty+nZaIEh^wVLGWO`*Qq@rSU{{#`>}6&;X)+DYiT$Fud>0yds-{3szqkT zxAsZFZ91q*P4=BkRxzj!HAd7>u89=4+;bo$0ssoaN`$_9w?j)7#$> z=^8pL<7ZwQ_y+*pA7bED^`gLpMK2Z%`d+%iau8#7di>Tgn5X^bM#@LA7;1p$G|Z3h>>I;#+ITvNcuSND>2wccqSQVNF(FR=!GD>CL%Ru4hj z<3)ePSrDA!slZJQ8EV?5d@S&tSqh1#C^0&LmOXT33Rh^|B&tdqHpewT`_;3)!R96tkM^fv|LN6{q3MrrQm9fS)U5u8tE{#NKD4 zx^D?r7zG&1L}v5*)k7C}&k#B_)uzyHBu>T)oe<#~h1j758+2uYdIGN`$i=AxLU@3K zXy%F;8BnY@C63+I5lYnC5y#NkGEWC+jZ0v$PaH@;u?)AP@QG9%sp>M$6*R8WTzbMt zlJeB10J-WcX%WlGBWqdm<3cd}pXF55Tf8N;w~pX@5~0Kp9fXF5FfzfxS3hZB-0?O{ z8^p>!H{DH_viUg}Fa*Z)1mNv66LmF zm^mhNLvCPDGi>c%zJ7nGcKnh_I~-xSh}D z3_JdCNg52R2m}?Nc)8-#jWBr2e*N85zRzoAEc(Fw>{1RQ@fRHR>}oivXSpKK|4gr* z2?~SktyENehk*yN6o$4;6-_@lv$!bX8dr-d(DXU2`=)}aallr^F+;Y{Z-?p1qTCRr zl-ntmpybn{BxO&(1yoeFjY2GW(z2l(no+>Xt2`~g$CN+TW%R6n%|*{(w*C1Bt5(rZ zQ$A0qbw*w}V*xeCgh18J$7(FC7Z56QpdU1hE;PVDiFN@aP7??OBsZQCB7Y15iS}uI zsiK;8t7byG(WCww8U*}esm*TBbxS0ML2{*Dh~5vZE*|Iq5+oW4T$2$Q&Ab@Z-{%`1 zN{y*bmXZd1l|cchLitzYDW>@oby%n-K>crwn0TZ|>(AC$=NsV#5)7^JX&Wp`)k##X!m71H!> z0U)$9x2`8xjM?`_!beEKsJm)F?;=gy4zu+w3<48$cuAZ;M-sFVd{N%3)PHgm^s}Iq zQcA-l{|R@q7_;T1A3)Q~FV)GxO(5Es%w4S_uLw$h=CS;?g0{FUrv~P$(8&U&f!nax zc*kmAh1quth^-AeA@yd2*>`Uo)y0SA7x?HL64M06{gYZph0BO=2EmtNtycE-lY&f#ss9NV7>o}j$cEqhAo2KDBTdX8 zOoK9An=vTJ!MSvLc?sek5Xv?_HG}^JVxvW5qpE%SJO)2(e9XP`pOR_DX2PC;dz*~I zN2reTXA;IRb&yxsatCHTy0@!!HK$sQ&!J=wU}`pMV5$+CEKC7Jz6j$v9TV&>v;YFo zc)P+eMpiZBKNMfBpF|_6d9g{XUoNBM^uoFk1-dM=_Y?;fb30Qj&AR(4jb!u8}4enn2q_g6qw{ z9D@VYoD~^IM#c|os4)Cpo*l_DtARz$W_j_@(p4mfXW|AlJULg6h(P{^{C2hd zi~kf2{c_V2LT<8F%|LzrCa^aTLbNI?3K__wr5ImMkyB@Bja3+m3mrz=Ny00rKeTQ3 z`Dc<8E<0jnSscTnunYWe}Dcx4xnfXL<$yoM3fCgV!zDrP(qv4qtMm zcn+5UwXK4diR+4(*I%aMGNOIVKLQ$?kK}Pd&Kz-5tZ{HK{RdOfyD@WRDfEC8<6tVU zp8bv7qDJKuF&;l~V1k|sim1Ru^uB43U0n8}F#x70I+V_JVte8g-?_+6o~YEP@f{^n z9$k(qFjg0}e|H2BN_$m`DPo&V`O8b=(VnT>w5dJnXrWFSM8$tE3UO|rjRi0GalI@{ z{^(O7Pi0d+U$6$JD%Yhjx0{pTI5b!kKQgTw&DGAb9}+JvNUsj4N4Vr7{tvC23iO!u2jNYCpw=HX4bR04Hl z5e0;*OGP|eszwI5KJ}y`56->`>rA+ONp(LV4pLK@v7qG>EIp4l!5Fr%=tZ^iiRk&( zed0(WL@E;ISJe!c02YWr$9=M%kjT{#MePX1N<9;|S0&aI0-^@I7eZMr%;*n#Kb69=J#bPhoO+Ybsnvu~`OTNT<^x?yK2Z0zh20#&1i4pI(} zfmK@}BV)Xy=8*jty#y|9+uDkODX{h-SJ43rHF-^k$>j_Bm`A1WUxgGV(zZ6N>;m^f z3GiSAyjc@IlEwywY^Cu7;Ac1Arfv?A1ZZdVPoG3aoVT{i(}-!w;S%vgSJn}=O9&`f zUb%Ay`RDBcjEbVaU&1899)mPDSkv*Hej~n)oOm9c>mEisbk0S%KTE?Pjv{UEdX<|? zn+)Cd&;SA0r{JWrg&q@8!G+6^=hbm;3(J9&fz^zr{gU|{Srl|23|x1DJ+q3TZvJXA zKnfw7WY-)v!S$@|z;%vk2O^QkhD{j*A!S28Cl?8R>3hd>|(vsDa0 zpsX*2kzTXksI8b*OB_^v6=Xvk$^=a5JvzD9N&2ov#5>__(dX!BNidYED>M{iQNW(I z-g>vi@?8SNa{XX(<`IZrBky3`a0f&zV1uIe!ReGIt3Eiu(iMjx=LuBWh)pF8>U!Ft zu_dgy@QtV?LK=&trkJbYvL{a`^--stuW+GIrRNhVKASM4CldYBu~f2bNE8?CC>zF3 zh-glRjr35k5W+>@RpPQGWV~>rJx~=NnZ+*mzJ6JcBDJF8MEnm3882dl*`;Q0#Q{i+ z>kg#57G_xhY#`P#Gq$Vw7cnZ+u^#dYkLa{WGK}6oye~wR3p__#RQU!}p z6`hVTwa#7yKHUz$SVv|kWbJaFd!pk}49QZABI}&BlQgjqNBm#VU=)9^b0Zsj_A^Wh z@cdnsE(|@pr+=}hOmep=EyjasM8HBu^Ym#>tblV3;()znBK4+9V9CfZI-=aLr}tg0_U!v~;25-hmHWgs0Eu1G(<2!j(D@>A^K z45;VY>6yZPja#J{2#GwUW0x0#?1N-t)7CgpN!2a0&k{vZbi0lkJ_X|kVy$RsVZnBvW z9-MYg4u`*b^V~vxMtruvST^+3*0Qa0$yCIwy#o8|q+$0)*xAp%=4$Sx66~vsLe9Ie zbM!!xo-VW<=(jn|olhzUB?~JLYM3%0C{(#9} zN+O^yylZU_01^%^sv4*0kJO6RQEFY=;bGC7oYF;~Op!qD;6O2Fj4U3D_}WIbX8aRi zA_D@ugn5aPEmf0wnt?gp@N17QvB61=7L|lK5X2*nXdMeTYJrrpn1S1gJXTR2d(a%G zJfXOtXC*!Bc0t_rjcN}yAVu;3>6mD~3M{1*k*&j;QjktoS+*(ab}N@slZIlr}b zlS|qet#l~Uu-uFyK6T89&iL9w&`Ugh7~&IKt+U9iUqsf|>Fai|rRbxQMiX5ETC!F(O(uZpnu9gq6rz8)T<2&KaI18aB! zDq?{41C@2cKBxxHA{#rNlGVpz4*XzSaU`N0(9{RWJoHbInVAOH`gzd64C2&ATiR3ORxeoiC^1X+6%juhK ztk`|%cW)_a@j}97;d&1OlQlx7;Xk2d2*s3=^r%}ywkVJDhWrC?IvcB7h2>@p+PC=# zi$;-vsn|wpv)NNc!oJE%!R>7V#->$8U~-~LjfvST&1)a{(ZU}DJC!4DR9X5~3B4mc zAd>=<8&{dD#-}X6P>MJO4&BEKu8@zm^pqcpPWoHl6CAy+pp9KpnOD= zyJ<2itqBLUF5^5K>9<_9m`U6{KYQSU)7rwT(9j+=$5IYbhlI#?4SmWDXEajZii z-0VHC@9WfD8(tnz4YHKFyUvfsL@T>|d606Uo_>v*QnUTCK8`2yt|eF3mouMkL#U3S zgzaUqKs}M5u!u*K5pCNh+ZWuMQV8R2d+!MP0G7X zlXQ_|A8$`Fwzc!r`+FY}2QU%R00xMCu9CD1376^GP0}=^;GA;ht_xGla~?&1>0R1s zd*Gg1&Kd*Xcvx|2up6FqOK)w;^q4A~1hfbkn9V*tABq5-rK&SgKZ~GNEmh(eRj`}~ zo;X-&05CoB=?!?1i8U#f{Uk$9Wk*4~L|B=Ndk-Jz4uUD+nV(gRClW>VDO#C~$JjoY zD|~WfZ|mUXM2Cg(gpOLpZB3DP1ODSs5MZZ-rXlG6g$b!Y4@34i4GbcM^vA zWUsJn!Ve0Xix4){mUOnb{%`=30#eWHp1WYvn)(K6v{_tWXRpI~inf!eO}+w)yaFos zxfoCjg?+?JMLq~`(_wC&!LRwYEBe(Z8wO*dScx>;Ih#z27{m!gyYqW?zS%-SvICMM ztW35ySgk_bIiYxVj14e@>VZrzAl^pl7E{jvPE39jakWY1*Z6UKE;2zM;Lku;ps1LS zM6d@Z6)vPcNut$%9?uLp4<3cKc5B7{CJoAoUc^p2MnFbAM>YgL|Nesjl0PFZm3kzyJnt(X#ysTryH642CW%vf*`Ex5io)(y?4c(Sw4rlC(^5?3-#N|9q!Dh(4|PGANq zK)p>2(-gw|&$ot_>s#8YMzjqC4TLA%9lyuUJj(dYgbAi_as*PukEw{0?GkwjGPQgl z3j_OLagqnm^+~c7jEn(p`FmKZA7l!gTqdZvE_D?ZMXY-d>TG)}k8Q`&Lf{5x>s5c_ zj<8M3+++R;_pJCgA^zF*Ll?vde0O}X{c)Mv@qP?G&k?u5i0RA433)Amx(~jOJ0+eN z6P6W~3KVR~^>N9nZUAedQ2cM+w25mt*UeH-JFMyUBtPCFYl?Wl&pDhHIOY7GbIP{}`zb-`AQ2 zS63|5FAr#KnF-KQSn{`T;EmAX4$}@!71A~4A1AS7>7(c?xVQw!rfG`+)K!_YBGA_O zPc|fQ3y|Ea>34&i`?d?D7v084hAlsmoGBL?2VZd5eRGdh%7zjYU@|Nv8%k@~Fiafe zmGnURFp-D;3V6uUJvL~*Qd}~0RqcJkz8ZH7x96~(87$nTE8mVHC|D>DW|k++=Q*+P z!#i6S!)4EWg=k@~Qz30LxJDBoFVL2oE*xO;z|Tco3X5EepE(r*jOhg{MA=d63S z95NJQ3XqAuSw8bY!z9$oWg&@+F+q>`TL#>fH6XG}1v*5URY2N_kBv z`X4}Nc5whfM<@MBn~k`fH{Dz1HQz!NPn`-g0*5ucR%#LJsR4s83? zzsU3)qr(e~`+B1$K{*$-NO(<{!y6qS;)H5G7D8^+%{~qndC!s4G7AiKwuK2ZhpL;s zM#=^aO)B^VrH@I(do>=6Xl_K&zM3KZ1o2k|p@m^Mo>%y1ucj3gXZc$&wf=&HCEq2G zZrevyuopK#Lqs~5bq>1l#z_^SEoxo2AgqCM7m67|ON@q1BjnVN@6Cx{x-ZY8;7rop z@c?H=t0hg#4*&MwJa$k(qoQxEaXhk*{9c(#5H4xRdAy>N%nrdt9>f&%Lk%yrw)mzx z$6$0r6t(a$LwXfrGPyu+FhCjw;(BpW6_jI{ojKEKnMbrz*#b}sN_5PycvLr$xRsL; z53n*v$s@LBH;z5{O1-ES zl%8~fY~B;I^dzR{ONE!9zhn^4G|k>ctxW(4M8K6T4x}F{HUH^{HVyp>&YDq_(}h|a zu|4)u{2C$iZSpWujn|AB#Jz4Uo~j8{!G$c0?9hBL*la5>iIGQ_n45s{L49BL01wNW zP;0`%wEY=2rkKCgJ)?4E#hciqD~&Vxy1!#@CR)dyMorLm=^PVz10vHfDh-Sa0aJUW zd@rv1_%0D?g)?$k!go?TJpRBqzN?wLL+XQJ$|O);NY*C`Bqg7*#}R4QPnv|tO0A3r ztr?#u&=xebjv&5&A&8M2U(zs)pdn3MGzQ|XN$}`lYDo0jz(^LWP)E`pr)!WW9CV+&t8p-7fyNae)pLI3u9&$|5L1<3p=tj zQ;=L&hI9(_b8PFp1=jW~r@Fg|3}3gdN7#of=(?_fohYw&liaM_plE#58AC!Zfy9JDOC}V zgcP%BlSCGDCc)N7+c{aW!*KmFTBiXF)v(g3Z(oM-5Ki~nP#wlry3S!#oOt?#W%2I= zA|Y`FNCsGj4DtX0gOHKeZyD=HQ=Qez3D#PJEK z#!RJz)>DR|tab@+e~Q3!9%2V)5l}a9n8j?;OzMyTTwXd1z*8IhYCRs9RAYjWsF?Vm z7|8pqlfMU!-^eqK)~(!!1><61^h1TtxAC}mfPKcj^Gnwav?A)kpQ(zptC%EDC|QFL zK65p4NP!{-HnX7+0AXE@FOVKKB)6rHGLCm)YGxn+UPJyESk&D>g(i59SN!T@IJ>N3 zA!KsMV0!`zI4&46M_IZBL<+k6cy{+=0^1_gJ7$Oq3eXN+>8LZQl?6lCEJFzX58Q5x z#x59mW$t($L714(R*|blCXWJPmgS)B^)0u_?GVE1roImuKc0Eu3c(fD_amq6h-^!+ z@APTmEmTXRY(I|T^hEcHuk>%?*8cbJF@q#O%frzi1EuDW8T53k@R|uW_P?k(1rnk$ z7dtB6uS(#ruU%ja%qhC*Mp`MeZIy4LQ3Hz}MsH7yc*q@uvS6?Gagf?erkVw?KFjG& z1-sUy^G&_jC8M|S`?LO5tw&aHxBE27McE=52AM~J^=NUPg(oX4;iJfLlXhQLZR?)B zW#@Sl(x-l=No=N)=`GtbEa3D z)c#BP7o;);oOHQ(F7c|$YS^`tXe$WHK>r82J^R+ig#Dkf-SxMJG@Y~4xb&q;;v+p> z!eoX#eMG6^^iaGLQ964a%Tp4$G0(}{$MN1FYBZgSu{UTZO!~fog|aYI>PO=mLy%Y}I>V`gbd5kGrF07*@R3{ss z0=bZ+08DyZaCo0r0uCXN%5A@t9xQ>`9r)XM9$OyIZEY|0)T^(+W}V~BIgCpYO*zLz z^-^M=y-z}mfO|)Tmz=ylZU3yXH|wjyCRWxnVImHo#cD#qg5it&FReVdIg<5|5N&~0 z=(sR)-@~RN8Nn-vSxvQ#-X6HhnZU0R(G2Wi2a1y*AOt1Yd{@>kH;L%J0TweDwt;Dy z837!a-h{Ik@cxA6S|V;ie@3HXpC%~H0(qu55<7n#;Xbw4=Mo|$vw*96K<+cWlM8M! zqD5lCF}xCk5mQqQ(!Yl+4HlB6$A5nccJkZT>M`768!L=GWHnS z&&p142Lw%(#&4mgAP`hl^{>%ucP0pMyrgi?M;B36&XL*asRQVkoRc5~{2NfF{L8I` z;5XghYB4y?*stu?w2;TPQdL1+QtfLBfq?3r2dNT=Zvk#<^hpicOSFCw`A!J1=d|)G zvknXsthq0Yt$5=wO`Jz#GLiD5dIgHq11l#EVd%3g6uT^ytPRfxBRJTJ4HkPHc*r`U zF5DqS7~8-M;jzu#4@xqQ96&BAkVmnn(U!&#ejZyJ3FJyqVcR2uTE*8%i$kt1N%kt@ zsdrRJZ0FJy`GLhCj>42Iel0-|J2h?PrEZA-WtB2^6aUI;{EKhTjsIux&qOX5^5o8e zVFGbulbmn{y5k|Ml7N+(sbZ*r0uQ5gaDTcutVxhf?$Sh+HEtM535%LGGxCYiDnD4} z9>@lZqJBq-A61wh>9vkOPwvZC&M0}8nROh3|)TE)Rf z>B?IHzmThaspjyOjX5oS%6A|MGgyiytt}C8v+pP2$<*3Tg6hLV_Temv)DXYGkEqtq zB!Z6Mj!hUFwrW8oz=K=7^qbYV?6hHB4XQn_)v10>6WdP+s{HzzCsHM_mnNRCDi;#P z;%{ers`&a9gRA(>qxoXvKPGMjRjmac-}jHkOrA^AElq`MJu3u2#}0*q$q#|(9nk%8 zgWWcje1r@+=O#DmN>n@i1_&>Q@5@Z4U~F3PGPYeP{lExz+;m?KGi8JK5&s^ARxA7D ziMGct${3(h;A~T^fRwkn-$yLw7z%Wl$p#fnKLNuGPdGtBJY=)75-uspV8Y}14apy@ zv{!fEQQ>Al;P)x914W}X$e(9uesc3xZ#<6ApJUTGy5Wd_GEh+y__$*9T$V09w1aF_ z$JQ-Ouih`RMAPnL2H)WXW*Z5%ZLm5;ulGFz-78r?T|uUOlmz2~@^_K;3-qD!)RF=+;4EZr{v$n)*kmZg1~ z-lSNtwtdCicIgebC6m(H7-m(7Ew`rn(q0F%^9R%>4ujlh7DndXfGXbq$xl~tB6(UN zr03iYBrcI3tc6~%`i70<_c`y%7HpR|b{%ya>j7 zMR7h^nC*gV%!qa2J1~AuI!JKYjAPfi^twUwqT^&S|E$sDa(`)P7WD(&#IzgaS<+#b z07Q0tMW|8E^|TeDRuzR+NKoNCW>Xr~3HzwJT*PqmjmBM!ZS%?jgBbQD1&Qu7LgZ(( zjiPKsaetm6D^uTo3E6qP{F_F zPF2(cgo9!t_~Nj+``q#7jzp|QYIip*)JW7X*mqQT+PhMHW_U`&8j!oc?hVI+0c3DE ze~K@-Nva5Ur@`Amsglgi8p|9(rVN6)3X;eHx)UR3QzvSbmR<|R&N(o0XVTO@8X$yP zo*sq1ocqUG6gL!&=fe+pK^Cfpzq9= zK6sh$M-S(sD9$C$OXHb#KKL2^#dWJ;{y#d*r*qs;&h~qGofjpi;82n(a3=pJE@shz z?C=5>h32D$-Z12gcAgsZPw!Y&&tD&L7Hr~!YhVd^AdOT>_6YE<)HJ2u!O!`|j4stN zp^-BIP+RBw;4!R`*Yw|taq{yox`A-YGl&O&9ZQW#w-w0yZ(b=3lI#EY2Z zH}-i2tGo&Uw393J-fO{2cat#;iZs*<)Q*cMF5AbY0Z~-R)wS|!X6^-1zEVax-lUL^ z=Enbh<-?=$Ax{qynJZSR$_Hj0ZsI+dNvQ&uWGKfjC=f2@b_hjDHAEoE3<+=5ON@}l zld0k5#6&kUj)8aXf^jMcqsm63j1lTB>u}d8&+#24XHb3?5s>awISgm_96_VM;n~D!AzNGc8Glc{431PU`RMUD z)O-QP9OI{Q=#oo&g-OY5Ldj)NImvHK8Qv9UwTo(T1`7Ns-c1l#dWxw>!3@;!utz?T zW=RxVE1?*%HoPd}1$i+Mx#B>Q!W%-At?--ZYPF0n?}@2&utNi3gx6vqa0yk~q6)pO zG?aV&K-qNfQ->tt`(d!~|7tyI&|!Qaxz=d&hAA!_8c|WmL2D3jZoR(Ki3ul@iY!ru zjCYlfd5AVi83GmGK%cSV2b8E2AH~N=8ZD{zjw9}lpVlS5JpW@BARAJ%9-@S&C<47X z3iy)vX)=ON9lqhiH~jy#h&FlV zD^iCwuHBC3P~>Y>Pla(((3p`Tl;0q3HcqXFWm#}QnYDO>sNbX!*(!+6Od*>B22|a% zd#znj0*H7DAADK$cMBGYuvnn3LXhjG$S0{-oS_A2;Une9eyTJ!Lf?)G^8y^2e4{l* zjfos9sbKIWX0xR-ZC36iP^zK041a4XlpPz-++rUv&D{Yc`>25*nE1Li4A`-%sC0%- z_X1Em+=M;v(iov14*QS>+Cacqa1EcxYVCi#e)tU(j!Xv)V)E__%qIglIPKLHFi8ec zSHu%<;Pp{?aO{uG*TFms1Q(>I7t+DeC~}Pjh5i{W5Kh;tHsFLoE8k0@rc!Y@(BvH{ z^?ire%44Z;h`4u_ZLT%x$EX#`hsrRJrHnXEXsv(fO#ztwGB`nM z{e1V*;Wi~1v?F!>h#Qn1*qN~J56i{w$7FA(wl!quVCx0K@I;1TGYvZl*>xrblbzy1 zanhr|{40gu5&gB7)9U@E#;2pEy}ifnv+<<=nk?o; zn;Bj+Gm71nqJ`!E{Dvqp2_-wlC~m(h`++9-J=$j>G*`0xPon7kxEtoYW=)`craws8>asB_&R=Xzv znqvyj)DY;&IsVP)=+$4(b6PL+@6hlIK0lJpfWP%LZ7(w>9c@*fiCI+6PElg+3cZ55 za)pvVC?vprzDn0KI)`s{QvKe}rbEmEJKCSxgJ0CQ##harMmQ)1I{oDESc&23KgpJW za5sJm7G)%W4D8Do+a(*r1tvC0^?K`c8bAulJtw&w4uYx2AMmJ)UrCZIKp%NN$4N4ql%7Xfsax@Ng6t^KX-; z-z~%sE$|6uA-Vbd4og%Ya`_WRgep}$;iRQ`neg|ISLM1l|LGCN4s1_j-rQ*kbP)?y zu<%K-e^k2n3N)mbXx$u~A)dkDm0N`S_FutfNKO8>ge8hBWRJo_V;*HQ zEI&ceI)W|WwF77eKk2=F5&g@1JlF`v#xf@G@DEtV$%MzzGKBw|u0KNUo5q#nW5sq3 zlOrW01G}kK0Yo2@&}Dw9c`h3Z4gzHZu%4d*#op@x#G?;7$KdW+-d{4O9Du*yHbBDN z63Os+&EM+`7Y-Hz`smN=k2J!Rx8TkT{uE-l2kkY_%Z%w{ZPSElUPixUYp(kn7T}M< zKwQK+UR~Z~VP2C3Yj|V$omeMx2dEgoM)P`u|NhK*J@vfsrAGDU)%>U=ygz+j`RTZ)optN=i=_q%P}rO^h?ieUc%oFUkDwR=$S{wV z6;)4(=b0}x=I`a5*Uz^LIj9wac{QkXt#L#69D!$^XfH=|+~AH^+Mk1k6ZpYU zu7k16tMIiO;Jiw`GFUG)n3uVo+sA9}zsA7%5_8V;;@Dzh%x5gyyj&ag-P^L7ml-F4 zyk%L%qR1HI^+XGsS=lF^7qaq#;bULD&+Fvt!M3bR#(8N$AKA((ZThph@#>755WsKg zNHwn>Qp;BRR0|eZ7tM?DLMKT_lJUaz1oIt)5HO74Q@b;n7vjRo6ySSuFERH!*-h+u zo&60im6vS1^skUj^3iV@!TXumyiEB6-GkHob-enfBbf&Z)4W7}wq1B$m4!XxQFfys zAZlD8dFSOeM+$geGv~&lB}5;uUsO1|jAMgMhs<9seWV_v$WS&Q%_|Dlxis^7#7dGG zEU8lA$ekD4$MMO>77#eCZu6=jaadh#HXCTP18$V+2BrP>+;p-Wz0Rr<*AFsk+ZF9ZI&MUK|l)DGs z>;Ng2iDWay;RM*b<2NrTG}_YGc|COuAcQQ(c>#O1Wv1-tOgAqlpOAZA;5l%WaBTB> z>3Bd+gwJa}jXrXLfmcPH5X>;M$Fc}&FMOfRc{Ov4`$uMOd@0j$(!4r9G;p`W6s+4I zS<9iaXkm)Kfqa8;Q0QO-SajJJ=w+dDWk7EL=Vf(;+&O>tY}m^wKV;4eYE~b!8LQ=G z$T~{R3&9>A1Y12QWH+?OsdCtVUf?N(1VOu0(Pmx`Ba11L{~ru5?sxC6a$eS7nBq5n z7>_R+mWF!<=Elo$WYdmWYl?j#_R_XTC;b8SPhyI*$$4GDLkcckGI3t6{UQLL5x%_i z_>dH*P@glcn={*v6O$(FGdZuKV-eW? znU|5<1)3Lg{0O2JVy0QqO>JH-*U4R_ys~+TI1nReyGSp6V~1_lXTF#qSKB*E(Qws# z7yMF0EE?LAa^}T6BQyO6K>GmWr7^&)Bc;kSE)q58b!cYXtEAIEA~tv(eO~FgD>*_X z=H>Tgj*8zfukT-1t~jsLadw^*pqpEVidptQuN{9XnyW9_P{hpd^1WI>>uBBYxi^Zc zGLGVZVfv{^M~MmQbY%CpjFPR8IIq!y*U;BM+q~K`1kAO$KLh^I&#SQi27C!v-Z}RL zx+P7x*;V=-0iYB*SmTwm*D}NeI>NlnuTdJ^d7=(YUkj6<(y-Of>uy^J=)7+Je%A3X zDLuypJ-)h*dnVOrSErYGB_O|SXK@bn3`S@SLL0DF@JloEQY^K}<4xoZO&lA{I32HU*U}0uXUgpi|;gt zHeQ^0LncIonU}G@Wv}sG8TRzg0VXrYc;(~o2?X|xh^8&Zi}6M@kzrSxVgsdDeU8^JZ(ru{H!f?wjmsD}F*+)yL0ZCG%a)%|UAtO3wLz7}3yX%{Eav%4 zipQ%Ouf^<|%F1{Rem~dG%jbEi4UUY=YrxCtnwJ%C zy6*0NUYY8pe2hlX?8?`>+nzaYy5f|lPW8I1YaqL%IPr`flK2m;O-X^v4WF ziWxaCBvd$ZCJJqyMh>N)Xan?mxM*H#)`JzA*CHO6BJEooID}L8Qn^9*+_&q+X%o#B z=lLBLs926|QfHI#ydZA`M~9iB&3*Iw6lQge^on?i79nE>2=G)h; z{ntCMw^&%^ykaZ| zsD?YDTe;!-=Ow0ZV}rye`a~-DUMQc7~mD_nAbqU_sP66Fk~(rCDP2<^HTo#Ip)>xdzj5DOE8zJ zZ@=c1Kr)$3tK2sP-1O&V@)AahRG1s4fmtvzXQg7jtRkK9qN&{r;xkv~b$;u7E12TV zO=9yR`6@@xE3tyKgN-&<8ZW(o{Vv&8gYzowdwHU|;%SA-q*&|86BwhM{mm;SD#Zei zm+cO^tIoMPFTZ=CgY-c>n998DJ^=1*=d$x6x;EVMA1!QiUe(WvxS9o(s7!)tur;IB zQ~5uyz4#R`60pDb)zRmZ26Qy&T7^w@kNF`0{!#I~8s3hhMH$DJQW!eSc};C#tK`s) zd5tn-Z&g_M4Q#Jkz{u@Axf}DUr!URsm0~AhbS;!{UaGxt=Xo`GqO-UoXN;)Uz?>oS zaHrXO&^JeJnb!l3(~~)`um1j;Ve4o*FIBXgTz`U<;Pp~zUVh?%lraVue#g7h1fJLF zH6oQ@qHQLB4ZxivJ~4-c zsgGQIcI_C0iStr2ESyBt0>4n*J&!lqbl|QC@6YR+_P8~#NvkT_ip#}oUUTMLX}{+r7LN?U}UjSg#Be+LbvOMT@*a{y!LA~Qdu`f&dbrCVB@&W zp7ry3l6SadUN5|UC+L-G%FkT(6`Zg zltYFz3{cFgJAt@j&Z|)Qu?h`km=}DL#N`Wop7ywNdKj(Xef9aE9w$4m`W#41dK9fU zl0`v7%O-%z!R*+EVIJP7>fGmbjMuKtctgM!;S{q^O4F|D3PR|M1@GqdMqlT&x1#O5 zN_R}se`0Y?67-Dsd+ysszYK2NXbnyfle5*e09zzdJ&f zUbU{g0gU&N*|eQkxruv6M}4=moHxvK9@($ zi<__M4R-vzEjhXHAPy_@D(kntV8G!v;JT(26|{|OPDTw7CJXbjeohHwi3C*ktUwYI zqAH^^DZQ%-Uh+S-k_4g<~jeA6z;rvJC`O zo#;)S*Utwv&ueGsu9fhMG%vKPux03`#{_>bI=T|Gk5{A0y zN{$yt$P3AsY&+7#O%Crn$<;UH|$7Fa#HcU!9HyOR%&&}(pBT1c?=`UfJ zly!&kp4ZRNqM8S2$}_J6xOL|`pGerrd3E5kp|@l>URxa(4F=~ZB zymOtGAr$PYR0_eTdHEd*rscp4{uBGWbl*;$*5mw;**O#{U&_WQJ46%zd3nk=#D3ue z;>E1_Vu6;u2KJq{z{%k2rgvVp{I^YDCbVLTAaBS4U?Cb_=Z1{aLfZ`ayin!`&^@qm zUItnOqK{)8n>-nP1aq4bf zFM&*|d0r>K-t3sy^RYK$`aYo#5bwCz{5x*oOaSNATFQjx1={fB-+qkdP84&Y@j3s+ z@gNhjaSnPcIM+KHb@h2GgGx1XUeWvm&QFIvzqJY&bzayolo4B<5ZlYBr(a_57WxpX zF#|X%j$n;~0HJRf0Nel!02Je1OP)b@QMq|xVN`{&IJ&Y39sq?gG;3T`WN>xom5x)a zFj#V4Upo@OrgYj~x}(>mgtzpY7gUU!FQ4c1G0?w)W@|O!zBrVO!H-WpFPQSD&OEPc z-feW6*PEtFC!N=GrqRpi^_);aWd_b8o88wF=4B4H-Npw2O6Jw@D@@J{RhvL*;lsYb zw{d{gS1+;E?P?eX)_I-dQA(Zl^j~bN4iF(QbbiXcB=aKBm3{My$th`h%jB4E!$E7!8>?7~9e9l4yGrw*)8u?0qF^R|@LtD9536aZm&~{F~zKDVmFZ zuJJ;OR>ULZZ3<-h1xPL=OdEA^*+l?~tLwg>&0ad}@$~n8Kw+3T@ zV4!>w`!;B98HPPgF&FB#H88`FAtODAiJY^YPFdK!4qrWZEl#IY1ZkXHVA`0Cm#Yuo zc&jxJ4X~wFTUKS_rd2Wb(9-It!Jdc(Sy;BBHvgKDyP0$S~eOJ&tYuo7@T@$nj2h5s&9aV-B9a@AzzA%2QA;gcXGiPfqO z;4LwhF4Yd6vCI(oYELH%11wpk9~gf{m2CZGeAS@0&XnEbl|oT)6}xzqeFno_%^oc> z>IM`7>T30M+2^X6i>KkTw%A%EEZ@qf*y^cbyu~V=^Gs@Gj(%8rz7%q_s({^SRt-_r zK@s9FfAV*Ct2*by>Ny$yb6@qa=%i-I7$NDdh_1w;-7AV=S2>b5-+p*)OPy z3h|efB4B(82B@M6ZhqmXYTSO=X@mG( z3nNnJ)4R#3x2MET?O0PG-Lg$^Q%-Xzt3hj}r&_x-`hEy;G^_d$79_7Z_t zFCezm3-@dP0vTsA<8r>KPD2{M!w+ zH#A!p_OD+Uhz7KQz}@z)t`pLDKuRgG<`fm^-m-V-lOA6K4#lRR0{Y0l#Vb#S@;i;MPr$QuDxtZ zYi?dWyq&^R(5fJ!j6C$ilk^UoFXxQky0)Q=zs3l0dKLdL6az7D?hz`uK}dhTZtI&^ z%_9f{Izr@~|H|^xoX66Wmx$25>}TG;zogKY^w@7}$$vTltK>(q*gAJt~#TG9PQ|M%CMGd{~T#Md7XXroYkWfsJTM8~k@gr1XgaR`yW8?Z> z9W94kBOPaeL?7JR=m$`x@l(2!6#;Ad72pQv$^QS${DV%`%Pg1GFa)Z9yYQtt6`_>+ z8tkB6a@(Q*I|y1|U0DYp=r2H;9-Boy*b6>9R1ac3(?c#trpLMr(f-cRMrSmu7pk4>0(ML?Q6%=5H0m;VS zNB z|Ls3+m6vT?C!T`Y2^(J_H&IE~Us_THtt^=#Yav4T%AXl7iA@}gnJ0H8D+*LYIvNF! z6pS##&59s*JEG5_205b_L2@4mea=xDljV%3`>mAL zu?g_wcfc*=MR|DJP)sSLm2Kc)3+eoZ3^yd|-uu8hYK2Rzf(1PB81!nxiMc&^a+U~n zhuj(NT72NU&oFZROPFqL_l)Ux9U!OIrxoYIpzqtN@59oT#n2E>eLy=uu4`<5!{FRa zzaJhSrELrbN?AT1{~8~Gc*}3MOnK&^Q<*tt8r!!NmU zGt14=p)0tWzdf!k=6%hs&ATZYy>uJH^`OjmSWwyYjC>X}A2W+77f}mU^p3cV)cFcK zdh%s==pbbQkqgxYK6i5odPYzvoq-E(t-*14+H;^#sVAZ%Lf)sSn4rL{#w&s@_Pr5i zjB*Qh(XCu>S|@_<^4hWo!#^5Zud>_d00>4;Tzzfv<5}tmf_#3%QB=9Fd56Art)WDx)IlDv27IyZ*D!QxZ}I5LHZt~hHpsWHW}Mfs z*K44q>!BnwVEk#%f>d6&7d%*6Qm%2bklwZpbUFBtXWW~c(OHHd87Eq1>_Bq%9bHguj_8qjolCx zQuGS+>%r>NiAIzm*s?@Gx2RH>6ga2-0la&4{ma#jiGL9}xcncz?I&OW5{;9AsTZ_} z14MHjbuPe>)kLF8vhO6|9MKsA7)6WI6TdeIN3UipV=Gq3@P3siN zbGGsGbGG^ zMZRr*GYBI4A&5Mo1Us%DqH#X`D8yb=shAwqS9}fJjsyVsIR}n?XhDh8r3L7(j7wL9 zRvg)uzl#;d>C!8Zup)bS?801Kwjk;2W zh%r_Y0TPpg+kwzqgz7gAC9Writzeo+GX##Vkkd0-m~uzX2|efyz;vx?bY$$tm$-d%kdNny5 zG;mQsXbWU9>-d715gczcC@RXR++@qUn4W_mzXF24*q@V-KTbI` zu^Kk9nz2c4k3D#yc=`YYr&49+9cPjl7+BGqr6KHAwWP>9#A^I;;iyX$qnu5mP*su@L_Wk&SOw0=@jTt{9ZVN$gCNap-g}b{PEXVe9V>K`j*3xV0xrQH<0j={SPBs z2%E&nf`^^qLK8Cym84RXwcf+@NSqdGZ8O1`aHw!?(HpmoAry*QzSw_!i zrYf!PVs(N&0UPy0zC~EY9E!6f4X4su0RMuVr@&zX2NM-vYCGVLeHmxBvQag{nbGnaMG^{05w03ubKE%T3UXT# zgB{|mw1hXqUYh;coB&5)`(H-BVniLtSDPodSg+wB!^Eicx%9sPoM>1QEyjD~r40f< zsL-+;{`KPg=UfR$2iRkn7PwUk<)4KDkWsX%8x+5f;T=~*`z~)Y=3ER8MdE+B z$w^VuL1O}pb5l9K=`9GN)84EbjK|1RDx4-%Up5S2lc&r#&fi}=IpN6~;_%+#h>Yl* z8G1jplOt>>kC-fiV=gG`CQNQ%Kgz>Ff}=hZ0~@g&gl2>~obePHSbXC-1CZg6%}`b{ z=?4i2h>lK_c?bwp*cnlfUD2HUx?-DoE(fxR7El8WZ*I*voqGQ)&8jnD`(*qzN0os= z_Uk^vby2jF-c`~Jn{x-9+vPI5zni!`ZP5NcGp`F3Cx1uzEek5Ar((fFw+==88*LUB zj+ZMAXUD6?OEQJx(kOdoD^8GzvtmsszL7k{{_k0{w6QzpXU+vQ3u{c16p8kD;oec< zyesTZHf|x9v&kKTQTD7Nul?PhntrRZi6iGGKw}9si_-flY(Dw*E9Lpu_G! z&L&$YMg`3JZzV-3+agfM!pe~TJU%5n$yL|q0brIqc?qg0j?YJ;5B*G?OqhIbhGLk< z=*|#{9OPIX`l*=#4$-8#2@ktW0^s$Z5QIq87~7wq=*t8y)+d_{v^iWLoi+haf_zmt z^?!PH`x~c@2wWq1CE`DCtMt18dt-h2<@Kj+tQ2I`_vgEfItpM^D5l}UzCh3R444sh z>UsKCZ%qYYQNsGI0;PmqA>K)V)eHR{z{G7m{giI2XG9z^`2|O9RztA!g(G4(RJYIP zkHwfNMYC7!T9@deErg5?9`#rxxk%~!EyJw;9atUL+8^EMIMUyp;UB zv-NdlkTMcqp7>fo*~z=g{f$r+%61DYtEgVsO|+V)!f(5#Y6r6rEjG2_qDhF*6_s7ul8G(ZSmXM71ZQ2HO@DX1Wd^!}b-dz9CKEWyTdsUE6rh5!hQZMIbO zpQ8S@&n(dX43@%$PHm&t0wQ15J(>xC6oh@4E42viClN`$vW`e>udhm0)X3oAlo{bp{mfb@w|?0z$F=U+fr$%B8YUPJeqP;x(gYzlwQhul(``j9wNuftRQnX$~XL z!@m32nE1Hq5ZHeJ2T2N)@ms}l&LP4e7yh(v+<6#vV=R)KS;&hmb_Xl#KLulk zB7|(_pQ=F=wBJobdAPrY9u<{w6Rt_*UY8}VrV;3+B14y)_$jN;;J62aZ1^0Fdz&$v zh;Ts11&{n*ef&RVVZ3x%VLps?|35GXQrrtQf|~Xq4lR9^DY*D4U13RepiW21bGbpl zp92tHUxsfsu}WF47>g@7n-tL#!m=t-=@5+ZBueDq;(C2wCm2EPMfiLdQ+v2duQlp$ z`Zrmt(K9bkt+FRW?dFwXwgNsXMX7iH_O_rb9Hf?Auj3_OQiv~T_+0kFgnIdl11yyn ziuX`Cd?`O1ILUQ+}5A8Jk*%5%p=*R5k1G* zv`^rdi5ti@=tI+FQRebSCGSuMpoG+)uBBnIG`h4rVl!3!Q9p|l@WfJAh*Sv*=2C>5 zsAUkH?kVU=g{mT+Mao170CG_x5E8>Jk1L_{KeguTPMqh!I#v59+LkHC)30`o)ahMO znj~8K(?gdiX#%Ps#nT3$RlKKi1Bh2^AsjdI9+!oM#Slp~>Bxd@H?I~6^q{^~ z4~fZB-d;>r)aB5W{_q)ypDZGTq>9GZSnxIaiR6+zr#h*nJq@xla=JjEkT%4HbWhV) zT#;6yS;0pHI*=Q!Qi?$$tQ$&lc#zB0MwfYZBun=I?T!GjCak{1UdoB=qjXKqc^+*) zEi4bs;zJAoFkhup+xi@-pup4#?upKpLjjL>vzJ_9b_WJsEKK4@TR+lBh)|61vn(jO zRXxS8bx{fi@)LER(?mI)uKcuEDLq8!Y;+v49|}R{$3iI)vEf9-EVjd~9grBZbRf?v z>TVd2Ce*{SBwq-F?*!+*(jR98%sLR}3@P`(C+24#%og&>KoeySyirNhEeT;|Sh>9j zIZn67d1;mIEJS#T+I7{$*t3fW8Kqb4VgVp<&ry0bvMYe44^u3IzQFePbQ?C+$$M??*x!I`$!e6!oZ@Su55XZS z7vA2UP6wx}(6wW!$QAqtDLNz#P}&$t2Q0hga~whvg-yRwqkE&p8w-|tVvkvluZOh~ z$f6+ThjiLz8g#$(CMr9qfHl$jLn1Wh8@d?)P9Zats7^ZqP|Azz$nE%6=#W>y!Z= zi@mNT`Na3h_~uCWT1nt3{|g9D=dkWbK8ON$E*v1jl*8=ABQ8h;mm75lq=buSmhMMO4o0z^uAcZJ5%(nim|Q=W9uUBuRsNVIdT zXOhz(IOGjDDs(H0InnMGFKKl%8@I;ex26efHrd*Z_LI1Y z6qD2}ez;RVy%ZVWRYXkj&*`x})9QY-*?8JAto1?S&!QQ!L0ftlPz*^8b%oP$g?9pC zFbO#4n$cm=e+K8 zr?M~Zx+Up(vxYv^#VE>NHq!|&Wa@+1Z$LEZe!Hi>keNNo2YvU;4BqG2j-G_#^Z^}= z7`;GVf#2tN9S~)C)@9dSk|Hw3*F%CX2oEl5BIGVgL_|n}YtDpVy+0D`i@{dBc1E}_ zC9y;^{)RuoIeBWq$GQ96jv5Q&wcR_^GnJ)|hahJl1-cQ3T|E*i3fex$Ds`u1kXof5 zB>k~@ehMKaX{i*$L$frBb7#P&1eFUkcF9S)Ix7c;=cO$3b%5C zCtQ57kfB&HU1jpghY!kEjnZX{7tVnw+CiB~IqsBtj|dA-++Ab*Bj@-jpsxCDLkmI2 zs_r5O2G-K5A`dG7R&XeuCPB9_xm6ZqOxpZ70&XN%Aa;i(iLpcoY4xcWQf7R}Eh`D_ zX)`vvxOzB%Ww>YC}1BzNh60g?fwL#+#DalQ<&tWz{S73J^nv_7tYr zLKlL4QsFlL`8?$Ujj+E0uNAdc^u3Py4;lns&>yYF^5FyG+B}ONfFz9l1|-+q_NorB zZ)JSI8M-ExP62&b8P*wwN-&a``0Ua+e5A$gNcOzRbd#U4k;!n(!`4=j+ zTp<-Q59WUAcnhNlGi!-{mVJwYGgF_gvTiAsg?b7CW(FG@R^99BgRrZpvk`ra6W92lZDp8tjdn(Ip2;P-e4Bdg zWMD{P>l2CK=Y`@Xa?^{rP0plv-X&XHSb_mxF7%A@;kcSJ5F&-A`$U-9N~G*~7K@+} zLCi%=gOnTCaYaX=_PrMZU}1Ss(DCl6rSn4sIBpOmRQT-x1a7i4YmGLD5?|l=AT0E8 zF=#MJk)wD{&+AYncYj}W3}CDE8EZaPP(C@n)QnNV-i7tHq3~m3dmVN z8lQ?ZHpS&1`P86qHUu^Xe4&7m3m#>=y8P;AYlXjD1X-{4??Kc*%2)+%ceY6M*~}wb zG(2d%_hEI_3@0V8pYQH%kOH-=WZ9>8M4>4gV@Tx3{1m{u&q z(u0Vs$`YD`kA;UaBrUMO$HdEK_gZGIUOeOp$ibWYN94623M1D4hYVhPE~4GQ zSA$K~x|Y}Ei!sLdBmt`0k2jBy35-pSvYLtr0~>Af2s5wtl+|fPkS){{ClXFLvYN6E zERRSUh3^6jR!Y?Va6lc<{%d+v7?v|h?9F7ue;=OcSRxN6WAUI5G4lNc)8*NCxz*IR zf(LD9U%yaq0T&Je|4*^1PDiu=O|O-BzDy)~!POVIX`M@w2G$$JIlNNxMb%-gM-#ir z@O=}0!kMxf>uRhDzx2wR{A_fk za0}X354jmcc@}C4?_a`>z_1qlpg}$K?CyaDsYBk_wzfW?kkR*8SuIa5&cnHiU<7|; zvRNmn6SX78IsEB<$ML`bp3WJ6_>7;~wrjN%=08+W7y*@wgJeJks(UdO2M+I8R-FZO zj57h(B^`om_I*Zr9f^|C9sKNs*ytFOL{UgBx3H~wdvfte;;u=n)F_=sc>Y@ z-OwkF*S%r39E^R0Q%}0(#*T<`$hS%AfIT^~kRMGlMO+}j9^_mm@7PwV+>9vgG7~To zyJ&g_oh!yPv)n5QskiAf97I76NEzt((GrBJd;}C~kwt#LU{d1M&o$_%y`cD16k!D$i zh;Sa+S5)5F_#$DpDw_kxM162POez()CO}J-kxVlcZf^=@ktkC@X#Kj$|c>fOR=!L^{{P8w|W8NIh^2UCfwQi&~dhH#o@^cthwt* zQ*4KQe`zE8g6N4mWq?^`UZMeWFE%a`m_|h>yxchQGgp2zm2OA%k_-Woere|!YrIXhCy*a(f;gOCQgaJ}2?QLQUmC#kZD%H)5t zwgY@#CotMOemY3xfzs-UK@!Ym?>zeC1h}Dfilh(sZXr1kt_}b1r8AQo`l&p=9jBTK z@nC4#tp36x1eIL7k2Ng*qxBZ6_x{FjN^<9Wz7T+Hkl^SB3f`9OFEhbKMS#p`pnPV5 zDs61!pNNnxk=6M*Psc#|N_7zi%j>}k!-ubyan(n0%Yp3V6+r+{-~E#tuB$`a&&DG} z){{3(?ZTKJOV_-x2-EZ$zZA|*#zMCZ@;@%0Z0&3 zoBc`JiGVlUmZVEgEAul*5K@u-jNt4h*X$RJ%k>N@DG2F<*caJalzpcBz2Is78MbPm zjuXlI@laIQ?avJ#rxJ_sIK2Q-d_p~*H*L1h8BPdMd!n0>*H>kLh@Oa-)$QaW63gmi z%kT$fg`_x_F~D6uZXVgKNo$9x?nxRwQW(Sw(YkEzDUrDxA~lB-)D9T2we^_;KkoYe zD}au+sra8tHMT8OHgI%5G)XqCWzs+pOqUFt5a93+yILb$5emX<#=St7s9N-ph}UP# zNNN+%=~9@2%&tvlHIjyeEQ=n(hTVmk29S5kZv?!1rvdFGmO-jrgC~jg`_&O3`MHQI z%uxhIgr?uJxgq3}6-CKynZmlYfM_!h*QmVlLgf;s_gjlBox*@it<>2AY-9oFHgv4{ z&HwKG=N$UW&99BZavYBaUkzGY%W6Oy{fuvMAb|K1rFoaB8Y({^kN;F%4he?L3Urx6GN4OU9mLx(vs!%H7m9N<6`kuTa)F*t&vr0&)Pw5IqaP40vr7bCq~6lk@D5v! z2j~OeWepk&thA2cx&c3o0MktCDK*2y><|{(W~*=hd;uY>cnG2jpcItjabxRH3)K-E zX$8+y8xG5j(Z@OWzlFab7b6qm_DZK#l^OlZg|op6Nl45i%7R@#J=0J-q&`6xLm}o= zgs!P+WvQEp62)_&5xnJztXQGPE}5cC8|@)1q4~23bG@)&D^@T<-$ijLeJY3N6A^qx zSk(RexBHUwK#5tx%74O*A5ty<1 zLVWe=_rDl6DzAC#(gCB+pMjcuNx0ytu@*z=cQdNNEQ{z5d~*cR55N{|xD-*cakF`Y zZ*Y+!6EF%KG|ZtU!muU;Pj?dT&ecGM;hXIu*S`U8D9xQU&4un zNfZt+<8~0_X2b49=Co9Vyh;k`$^Rtkk6G_fkTDcZR879Ac3(XRaEqoKT2$CLF12SQ z3fcs=u4#OTuvo)f`A}c|2B+N}6AMter=L!XU+<%PzgqWNVz}&BT)*0IV>oTL+i|gC z(7s3r5EAU4yR1Xxu+U&Fpryz^C=>pW zN>%O>35PKIg{W~txY2LpzG%4j01`Zh5f=As31z(=0#nn>A#P&p z_;0>FAQhV;Yv@epPED`Mrv-4J!f}m_TJr%CQ5B+FxNI8j|DuWll(u?M6}Sl9E(q;Q zY&}BwdIzE*3w$$2f`y1dXc$dn>=qKSSXaJijOI#8 z+k}HJW3QX2CekZW&9_?^)5H-i4$=qe+~qpFd1ESB!WRnu?X27qDMZ@F@LUCn`h;Y7JR=q26yEqkX}=dO#d-sZ!Kwg-es$wSU3CLISz$^K-KsNf?Rs@nLQP1P(F|yT&$V`cY`Z#CFy*yorlI{ycbE|p*F$isn zK+6U+?k>9FhZK9c08E9Q0I~nwq57Mx(Eu}F?JtPrx;&H5jr8S<&R6&>7-{ClSuLSJ@;VH8XRRC2A ztnm1e5xNS}xvDSE8WO&J#i)(ITEMy)WYIwk$bD#R5hbwxFPo(qfI$Q5Zt#m46`7_5 zruMIVl}*MSgy!ZrqO4YojUjalNaZg1f;7H{`D<@_g^>B&8VTUA1Mj7>Qq63+L=fkRELaq%4^$+GVM7q{!j6MK@BxP`h=#yfj!r?Uz+Ki~5IB&PN<*3} z2OznMR<#UkNZGona$T!kP8cH*N`Z1YN|s5SEN9tHh6vs&BijnKeIm-%K%3)1oga#G z;`i`j6`$pHH1ic~!Ts@jo9>Pu-5n&FnByVBl2 z9_L)|JdQ(1XHKA4H?Y-U0l1w^w=BPSfpv6AG=?`ODfl+cgU)$TO($@xgo0qqVVZ{4 z&B~LMl06YZ@wKBY=<1MqkT8y1M(N;|F03n;&>&BWQODIX`>#0|R*0V@VVbBVY=!>d zH+oC*Wr1zrutyzZDYmg4f)~)dfP!+%ez2YdoLIlHFDn7P zfHnP-Z6`Q5vFVXZ4%T$#Py{D;vY%TO8(}#EE)VMIQ(8*2GO`W{N4{l9)J88`*IuA` zfyk{(;%ZR7wJoq_La8ND#Lltf_F`CfjKUaljy2N8UoJA4k!J26@KZR`__^-DYSq9cjwznykQ2^-7aZ{^tB>_m>rhbrP zpZ##R7zd}i^FmEW*Z5cqN~Ex+rN7>+J9V%|`a_PT@Rwb=?Pz)eyX|>0rGFH?VU zIMhZE(l1Gmj(4zz>VCKmU#m&-BXY5!0cC^1VJN~4($q_jBBl>&D6$$2ilqT_aun%_ zJ!*_kSYtg}e|JL2T0pW+=KgNF!kYIBYg|t%!kYCorfnJ7jtp2+A?PKPL@eXY5e^pC zRG%m=F$!g^ptSCox)&+|@A1$$EL_Wxr@LDQcPe>Ss-~;ajhc?Ht|#p#trz^eL(!*Y zEf`fwV2$;RkE3Wa@#cIw6;^21aa4f5}R~XCRp?CKrNn z7$BJ5(NON{Qm6iGfi(}8(=MTaHMD97ntYk6()$Cj4ve0csmS-)e^AIKb}R#HBF_j8 z)~v>2ijn)Pr6rgPV{!P069_Wp3z4?0saikDCHE~*40LQM77=yp@3gAV!kVv-Rw77s zfoF(9H{IX#xG$63QPc7y`1M^@!Xq2Gd4VU@K448_nC(J9&~Y~mG+<3H+@5HxJdr-# zqro~XCJ`FY(vNnO{8@J09yfu=0d(CK17hjYwrU{lt?hZiQI?|M*VC6(73zBDq^kbj zQ-7!#aJ%I^OO7d=F4-Xm=(rAwi8eaDTdktW)ale6wRMvl&xFx+-Ym>f)eO^y`s-f| z`vRV6h5o5{Sqtk3tdS(lcx#u4>PKfKNf;}y-;cnW2*$`)G6J;_(t0GTWZ6C{E$3ER ztA~-c4v@O+7-dH3h76Ca)1QTsCzoW#TT7iluqJY?Op=07lW&9Q!#6!tSWf~mK(JuF z9Jcmf+KR4@#Z8D{j57QK`@F)9a%$3149BaKyY1+stVaiBktdRU@259 z)JjW8`bWe7fg?G?#k)qikg&#czleMiQ|5iHSxkx zMnOUwn`1Sg#F}pcv{sW|=a8%Fb1fdlQ0wqgQH0$Nz54<2MZzj9j?}4az?3Tkhn3WlRH`n6uFziVkjIsVWc9Ye-ko%vHs>%ACL50 z&lq{YWy^`^^U#R9WE~iyeybEcMk^)q7kO5SWEuKjfYq?pXNC2& z^io{5tjG0qaG`&4b|+!864a#6b_GjOc~dk_7*iJdo^z>scZz5vT3$#FAVSG<8hm?l zBQZcQt#o%i!RYRMds1PDL)Yw*o(x7JM#gyjpU|!agCz^$V<{L32ukoldBp3HGcrSJ zlc19v?`;Tw$uZyp9m`=@3NcuRXGy9}i%{@UI7yZk18O?p!aW`LQcNm8Cz6QH>P$(7 zuJmUibH-a^^bt~IdeqSE08sGqsNK>r{Z5u)@e(eurnXB3fL7xj` z!e60C3=pu}(`7vhDgCbm^;o`fe@dzt?2 zuC6qr$TB>0;@le){`;zO^aOu~sUFj-O3j87g%H*#60}18_`W~jn}Y8p#|d)DP7||Z ze2?%%VU46X-mH-`w+@s)TfNFymP>eqG`Qtx&}!tg-G&Zlnaec>+UGWQ3lc z7d>Hfc0DT!AGYT26)w3MK&Jw@p7WU$hQ^g}N&jKxAK7&29f0>}mxD+=OV?l>5Q*5B z>J~xPyoC%?%dxA5CPaDM1Qvs1^Tj2OUl~=klLZjJ{Wu8;K6--BlAQEquf&TkS>q_B zK$Rn$7@wJSvK=@f%V`~)!~j7e5Y|Wq zi*ZQs!T%eq1*HX(%5>?K0Ltie__I6x`@)tOaA8n_UngvT3L|q;)rI985^nv%3z0T+ zr-uW@U;tR-Q^K15OQ8@}BSyz`=+E*CXj>`WF2;~>j#)%m3&W)%8TgjA3PZxe)8DUz zAu&JzD+Z8Til833^yp4nCCh9SiQNv|FTz;k_?A_o{;D_tS+E)n$YPCdd!J6Xprl{pK5Ca6VDWtlQ&v@xF&?-`H z;t9Lv1)A{_h#{+j>^CQ0Fc4Etrm$57Gliz-K5!GdrE(g1C$;9YiofJcW^GNy1EAJ*%op zL@UK{bF$aMQ4A1Rmrw<8DICS){egIR{0Ju;w-gH;9*;fe)W!KYY}1Pz(@UwzdtpFj{Z-gpjq9AB$m73=rJf zpnfbuMHscEFeu!7=Gv$USr7Aw)(lhHmX$ClA`8*f($B!BBnJ0Amh*iv2FxSt;OIk* z#xp>$7?Vo%h3d>gR5yY<){80zfJ7L_O5>4zfv>AO(Wl|OHFhRdgvQ8-0`YFIN>DzK zfz)mtxGz>=85Sh$ZVmacQP`YzHwNQj?2gGE0BqCb0FW2f0zOw@Zd+0~<-&Y;bMc`B z8@F@!xDqf%?h?mt{K&2x)N&ZlA8uRrM5%ruthublfLFNUJF>yb!R*Fy1lyI%^$Q%#rz+w{gBCtKUH30M+}C*uvm z5dEa(*|+Hc7ftOj*A*>?2xeOn^zG9|6W1DoWR^(0o$d&h3F!Dads zPHpy^u*iePI=NcTv>cy8|9-yS*1@u6_-MlIi8{k``+AcY5ZeOQc(W~+BJuF#yG^>+ zlYsaZM9Qoyk$4cg(oZGz!4@L%R7_30+2ur7OV4F})Z#dLJbGOdkQF-EKs@zM(8VQ3 zSB`ir6E>+#*215J>`Ngnta%nr9tvMVp$Kc*f|w@DNM*=DJhNX*T!PfJoiOJFl1OMZ zDjdK?7Q=a@|D%2V8xlcA`&xI82mLP0U_H)rd%E1d5wCEhUXLMl7bDWK^8rCKE$H&oPaOB(6-&6^p zQui%`OE}?20Vju>?GeA&P$Yu45 zVanwK$Li7R+%h3ec*`)Y`bKs@yl5l_2wnoo#stKrsPtOQECm1q5D)+W05D*XaAZob z7>W%P005P~V8nV}%-R4`FGY)?-ie7p@JDb~nj>J*I{P%45;ow|A-9T{9|i*iRv;Q{ zdwLlWYiAZGy!^WI6wy%^SFLYa!*GZ);B~Pq)h`6ugeM4&TSR1{JoiUL2KTzyLlaFH zKv6RbeXEhz(&7*#%uo1MRBKH-2ohO@hgzIZqOG7$)iWE+MR#lnaR;7vl%NjmFpEoWJ^v|4`NH?$t|u>mca zu29%i@dP9UZ=+z(Qo@fa;biL_7o`d**6~{pVAJRxhTN!$rcQ0G@f8I26&DNUIIU6H z2myU3s^+Ima9#*KfvF3kYjjeCRjRwR2JwFe!OuCDv#SUkF5{~~$su4qGV9tq@&@9MW#dd=gT;ibVEsIvGW|(Z7 zE!tG6>(^qmpxU0QrMw=*5$On8T=wrG?3TD}l^N98C#00#YF*Ju3B!D(*`}BScKN3i zu7pnZe15u4a09S13DEB9cqg!u8L~0SgJpJ(s9JPG5f4Vad*mbQqxQ;X!3eW*Vg9O7 z)02_*5RBbMbx^LYn`;`SBKX_Cwdno^NwrMim;L@mS3Isd=}1988B98(9Evcyq;bP zXv88=*N_iso{$A%@Vq`KZFuiz3PoHx@*cT~x+oQPDeNIG{$=5~DjG?OI8LGljORze zolmypmT!?U3;T)R(m;W4tovAG1L|WFAU9{-gsd4l(%v!AMk5}Oc(L_~)4`#T>^O$D z^}k=Yf{Na7Dnq?d`E8OinRq-^`3h508ApHBe?9A$dHb;vAL<829$dQ{qCW{XfqG&z>~Ssw^FZBMsG@oC;j$Ebslb8!L?Qt_ zk+TMB=yK;ql!s`lD*;G4HiLH~?3M1!uDB?qa<{-7&=f)6iO@pyWuJfgnAb2siGjc* ztTWb<2E7m=5GZgx0N&!*#JU8?gTi$pXci%pJ4evOa@euYLxE=>p~d68f`yuYFk9|D-{X8b z0$GQOIuP(gLDs3L*eF8*9{UW89FG_Ob^}{LBeLtk)wu&5$E{o__d;4 z%>cdf22A~$>ZMIt6;ioYN~98<<7rL0n(BjtrRbg zQsSI>Ge}h^3im|P01QZbzYLd638PP;Yvm7cCm{w8fgpYQYKCUwI77CtDuIM^YO1HL zP9GBrTBYV)pSB-)QX$aYb_du>)GMS!NnyJE>u$qM&gsb;ya7XX013;CQivevU^}%>1CS5Xk?@oh=YvgG&ex#-K6g1{DUG<71ik9yk@jiQq}~_W`Lx6U5Ykjs~m?^^<$W&7B7xpYx0u>>Kfm zNh{rrwiR{x*uyA!9YR1^MueBN%E3Ufqc8BT>jJjGs5=L8U=e`ur9kgo44QV54#|r< z6zXrHGw9j`Plcp;e3X_USJy>L%Wl`mkt0ZZ=G*q&+EdO|44gQ^T5j1Ju@iPiN0nxT zr*!}Go!Jmku(R3m&h}Yref4!)7zrw z734VUz(Y5}cy@gy@2Zup1g zZWo7b$F>MxdZ3%B8yr~1G22HI5H>E>P~;ne2L`Oxb^uGY?iN`#E3o-Ci zjum!3diu}#87do;p^Pv*j!NZVkIudCsq*Je9uWXBB~zHo;2>Q?6WijOO^V4)-6Eh2 zPxGK!TJnfm(*y#BQHfY&6c~Vd=%uXe_`dsOCWmtAQG~jIuVp^Q?GVRf^LJ5#jc@=z z(Fo#ss-tkk9RMW|RWw5ns+ZzMD?!jHSP(4v1@VWww`v5m;vy6@(UJJbZKadwiebHM zQ*?_-g4+(F$k>sDnafrg!cV8qVI~CekwXg4vz?;HY^AX*?D)uHj|JeKMjEbA!(WD? zjP+WP5~ttGBR(lP(^v^X0<5$uz~YIqB~*~|qxW_R6DneC41lz$2Pc5dgN7ec76wOp zlWB~}?{k2=INvn@`to~P-Z@E`-FYFzz1(e45k9Q@w4h&tf+&=(*6P~yqT#I^~o&G(a(U(QZWiMkZTp; zO9cQ{sC>B9a;G;_boCHCv#e?z&KpxZCuW8^ga>U7_NkFZ^HliX13heyTZAkq=sneEN^j33hea+&il~U zQ)*MDEyFwV36O%u>z9+}2Qh9ftCp3M$)}FNW|@pYIEqAmodSjet-sPtS_Oa|pGh~f zl)qP5TN|*MF(8)9?v$r|;L)Gw4wDx#O ztN_=2R#!`B6_F-8j+E^s}&&cOk<4)`Ne88qn2gkenCw| z#Q?3lx&&kvNdspDDI=-I1EB?vk~~*hZm8L<&}V>zs-hCT2{sA zc8`nydET(e+b<7I+!`tIIm;vPDKW3_Cae;!E48_qp zN4KZOI2TXGFixgc3x^gsKv%S78pC3Z2)2Gvl8Mcy<3J=Sq7Mf-6dz0i3C7ucr;2(V zLp|38mbH=~`|n`Ar*~Rwjm__QU0$it3D;V73@=7yV~E{*Ye-!F!TuQilA#0ZnOn>W!p`)sFQ>xw@GeE39x zxPd-);ow1|Ui_p}MDW6RbA@0gBJ}Cr2l}&{1kWgXagZ@#TqUI*4T5+~#R>3v5S6!udS8M46_iT$c zMVQpgsscbH-aViT9dE)tWAP5l9FI!px^JyY+}ZB8%_6VnI~dz2CRje-U_vs4?O}1< zF(w+Ylw_~{)Q7>37a{I77PJ6!4GWoW0?ysQl0jrALINg4*c!<_$MFED@1K$8?89PV z7ZXt#PVoRxau-9;b-{A2K{m7#!tiVcaQ(g`p+z!sACVt8eZ~Wp{nq1jq7IPKPOn&a z{;&q&Qm_c8Kh5ps3=oXX92-83)tGuQqe$#;#kP*?MiC7Lx(@fxrAKg^7yA z2PlF9<*pYI0Z}=2X)DFOHYCVW!)s3NNF9n}ZVw$Iu_Kw^q+mJ&dtGCngr9W(&U-r~ z0S5m)u_7=gst!`j>CJ?T>ChY`%gBcwU&tBcC->RJt~%Uyzb7!LR_7(VMP~IbG&20@ zoe#)=w*^Y(-`{DU{O*MBzFHR3KCrEO(5wsR%~)MtHBk}ns{k1~%taPZcLw6OE12pr zuGv#lgRKTLH}b;TRD#zW zy>PNFr0eD#?Wl`&G8ul#|B)5B{D3y1I;%77e9Km1mG>Ks)5}YA4W0f+v-#MI%MisFqxweY}!vk{cmgs zvB1QEM(OXxedU3d5Gh7wV5rM$E{Tqs>5aU8<0--s$_jL^*XT%J`;fjv4uNkVFf{O%x1#O|~&$EV*apBA_Nc3S|Cwn1(coB|qm(?({u#&R+J* z%r6?mfk5|JS!Qx?hQRvQQfR;;PH+#x=3va2&@99EG45j+_+_hse2} z-w9!t2#e^;<^31vy9`G(j_+5JLX1k$4hzaw;J9&~wa`|P@B>!XN%7=R35a}FWUohE3ggMRy_pMrhnSxqB3 z@3SC~K>@LMg@yW0 zSa(<;xSu~j57n6ugsvN=-Q-jiu~38$cItmrkKoNT1I6uIEhSXG&)LCF@w=*^oGwxw ze)StFuMnjN4?tl6i*#)r1MkTGjm;`!44ny;lojk45n)`aG%tmdyFZ&*p)8TafPQ{S8^#KZB zL2r6pSET+#ru1X+k-twUU*hiA)Ag$3dYRFF<7 zRNnXzlcz~=Xqh1BXT9!O@^;Vok{qVSpdfe4Ca8H5)tBsBVEUtp##%tA9AM)ne-S>h z=&eL&9n@SBevKVp4te-CFeqW?pVMn(Aoh*bsB0kzLY4(ldd-#fq?h(@g|Z>J{XJm1 z$NhM}85Oaqn@xfc_&3L1v8Cr6Bi%kr%v00En^V%cWfn^RizYbN(iSV`@w z#|s7HhwvaG1S3HhZA0y(u2;8V?0xcuFzKi(0H~=^U8X<>)nB|F9+LR0P&%9Nco8my z5GA571ebdcJa-Ze*<8Q3wk1QEc3fKT6j|}O#GC*nNVLARLFfHVXdB}0w!g`3#5Wii z9alv~e!N!jTUmVT17K7$(aX*8P2Jkm1|AU4qxPvmKt>7J**Lvj+mV!SLz-T0@vHwGLq!{C!NhT|=%xbCns3c_ zo^S~VseciOZ)6l2y?;GzN#6rQ{(4cHh=NrxEn>wbp*0l038IU=bK?|u%CJFP(-Tu0my9JNZS!?`rSgo#q)j|g?LxggVj~&p z48V8TA&^R#9!P+D^aA)4kh0{d4^lb;OQEdrg8C)KKuw{|TH}Z10Oz*-0#im@>i;K1 z$-TE><#KclcVpTWha%%a%3eT0e3BIvbQ!nat_^}4urVkHU2A^VDy)7@8^?HzU)`gm-t@qun$)IV^hapn+5!Q)GptD2L8@R zxL|(AG(r^gi5-&H^q(xzgh6vLN^&o?5p$n=d2op$KxM#y9&@~IznbaDPC=Jffi1Jt ztcEW2hCcYNn&XMr)<2)m^0C8>y9MK|DT)llTd2X9_!FE!g;xT`TPdpYrL!PlAG}Ji z&i`;1ViRUn?vJa>Q%xWs2Li*IYR(Y&>D7*Clle7-=|=-9lG&9bfe@l?bGO$v(1$^g?-wn1`NPg5(BqGlZgSos7dSi%7M-}1UyMa z3-`lyRZ$LIaNn(+dUJ90PFsJ7G;fuCE!qN&S#UXnZnA(Wf0zxs^-CjSefFvA{D`JT zMjn!$Bn3%uaJKT`jdlM-gLBFMRA$$7ALIl?#N%T1J{p*27LFyclwokz*m!AT01DA=bCxig9IlRgu{ea|V3hEM z9Yf3C;=gWTUd693yWv(a4ilUixEl4?+ztlWfcoz?S7S3kd_n|-2nN4x!kRJ_23kD-p zj&x<}a$zxOT>c%6p>ZW(JL-o*ULpv5-L+{C!Nc`rinzv`luQ!|_DWhU=c6#fKQ(G? z8DDIHnm+74X1ady5}6mq*-D1wM3+{~U(8HT;XyhtEr3WSX@p3zhSqV=Tz;z7Vj~a& zlinoXsBPl7KGvf3Jb6)?Xt#FBeRgc`!3yYdcYt1lIsSq9Ov!^rpgG`SUQj^CeZ967 z^^wFc1)ieAEH#psr)gd>5a5b>PQp9%1$$E#TO2$d^$2pv+#>L9k7sMNE?jQ$-ThfG zkm8Ha36JwP$2k;!{P_+>x1a-2sh!S(Yn3wq!4UB_%`Bk9pDGu1LsQp-c}#L~3B|$Z z3u+A~axcS;JaZwaMb`PRp+!?Lqe@i-4lkezK`)R;qU~qlg<8ZZ@fOH@d-Uj%q`7rq z{wv1tS_@0GZv`Gpnd>wD2v+YpCyJRdawn};!>0f1n;5(rJFy#vqq+|y_hO@yymX6m zwpa7cHYz&d_+QWdW-R`G)+nyU2 z=+G3$LDDN&)(+_JmHoQ`8Z?%=9bfo^#h5%`8`2y)UOz4JU+k%seVGHEsHoP`il+ic zOZmkF%>`2l%4?7>I%hW7*MdGBrm#rY=D|8zjyrR=@ztxk4Oql3QTB#IR$-Cim3%`_ zTPyGe`b5K&NRQI7XnE66JvLSPI zLNc9dx-G)WUi9-?VaS=_R4lL<3&aVlP;C&bY}G#BvW;_G4=QELzHvrW)aCVoAB)Gp zKA*IT5Fv!KaRAlViMPWKw4F+|Kg2Kd&N3tP0Zb=J8sdxhw5cO@{9G?$Ms{Bm;GU>+ zWChHNXOZ2$!3t^SIzfzlX*3&}Ae&)7O|uODn{4t~Z~4$sivkrgmY5?lG7%fUfBTLK zW{FsSD;#J)DpOt^4zNuwf>p+GJ@K$u6DVuqsp<1yK zGN$X@3Ayg=RRNJF540QK_2pLUfH~0PP*`g@3K^P^e;K41wWDFcAwsoq_O~=wBJTtV zw5qL?$_BUy$F6Gcl9iM-T(m^-(b9cx2mzrHs`zSH8H&kQcq^ueW);SI!R*6Hh2L^Y zNC;E}EsJ&Z9bkc-3imhsL66f0aUGK&bcB)}M6*b zpfbX@k>J>+#cf;4;mAQuhD-NB?V)9dZJdH5Y*bZ}m$e_F8Mke!SN#MN!@xf5dru8Z z!}Di7&N5t%dK>S^+W5(E|Pbq@{ngG$WaMYyQ;X)ahY zPrf!IQ3?#e^N;#!AtDBL(*JhHiwFrHK{w6uSPNzk&W-)&(r6&I8LNOSS-vyjp%J87 z<>+?0yC%GZk4FmKq5T-dA1$z}XUQh4eH(IcZce=@A7|{pgtM3B=z&db2f+>_$@(I25i# zD3&^vGi08agT66EJ?pduoMzv&ZGin(vq3Gdbc|94Xr4vNVufRBm|HOMmBoAAqRmuo&eMkF#_N@8FGN< z3>DNk;dxp+2wwozUrL6EZRdxIW|!n3&jh&xW>kSIb}T#^Y=WabXk-Hi$U2|WZ6r9f z`?cBqxzjM}Eg0zOm0T8^+IfH1xeS%N-6D8{4ex=3@KoCz+TGiY8S6*ter`-qD)th- zlYp{0JNd)tynO=*-F28wgxx6s187kD=4eiF$W`ip_3nVc-FjZY3c}kW>@(#_+jyFb zdfTsT*{%U=XH?5hGc4rXw;8kP(b1t?7~e4Xs&AiL$OFZ#`%MXT?rvu^aFN1))!ukR zrHGbqf9`eh+gVfsru6}!%BCP(U`6DbIj6W>9=#tUXASOKKM=HZ8RM`}0SRnQ8=yQg z2vqexXrBpe?F~sATZNer$D}A$<%fy48pYkGomC1#8su^(Q!#0(=^LLmNTq0UZUKrt zNg6x(m=Z{8jZ58gr+@rCOEa;UrELG3L~QIV+q78|IN;80-Ui4|Jtu?fkXew>%qrf) zqHm}#-qJe2=7tCpMh!#|!rKVX&o8)wsQLOmfPSj_n^~Q|sOCjM}>?|bX~oYWf(g`vR>jg`g`!2A#FHQw3;D2kLl57dsKc9PO#UUb6e!jDrI*LhH)H2vL*joiK>{7ElEAHSb8ej9`gjml^?M&!ufo`S#;jv z-sX`YO_GbInqHSZ?xh&p%0Uf?(kRAf4kWPHeP1AnFCZ?XAT~yr>}x^(a3fpzLX>}4 zWR~&E180MbAt0Z)p8`waUu5 z{)!1vjYoz-OtGGPl^x3WsRT%)DN^r|u~LFT8Kqm4u1VJ0G1kTxAjI#3H5Y3TNro$^ zO?YHlBnUQjO?F-;LN~fD;$g$zeNF~B>)26@O%j7<1mztRa4|$w&`TxxrbK{xOlkin zhq8Aa?BkKl0SovP=`i{iXM~_soq%y|?x6n}0NNv_-FDu4(1SODoP!2Ji|eFbH((4r zdZ&vY=r}@w(enB0D)CSwrdV3-tDNUZZmz*@+_hpsgod^t`?h4m>Cr;9$(x6ekEg8% z<0|fn)OTD%OIXyNH1Z_7dAxPLmLsjvp%4Y07X@fA#IP5-z!l=*CwZH?rE5-o)twet zx8nnWIgR-JW&ZB|&!rguaU6ci?@pwwZV+xq!)Pl1AZrM^t{%&TEQbQl|T)aOf zg)X(DoOjsd-?V1+rRfNGoAS1PG&8V~A^E8s&hlnuJ~T#`T2>TR^A&!#D-{r~Nh<>E zwuO}*(Hhv7R)UD?B<~(=u5xsEPmrQ2MxDRSILd??eGzxRvJg0Ip_3{;c#|@4&?_dC zI+j#X&kb7Jgcf9rLP~F~fsucmi507AjmB^)nUoDyBa|7)z>HFpyRuT{Rpu_qxE5Zc zx5JLFrlPUh#Ott9cV&`H;znw6T(;h!1by@Ondv=CPDqk1a~NhTQUg+86{z(Mu{Ah} zG|g7SUz?-MSVl20Zp~Lonev{hfyxKdMjnI!X89Koav;6%X`J-r0{v+1jIINu7YQJ$BnZ*MMOll`lvI!-9XQ?9$BWe&3gINv z2UYlPQj{puXmVKxt(#tV5ozk94mVx}u@DG4FzeS|oI~>*ORh=~PSGLDWnx025MsqJ zj79uI{YQ)Ke*F!+ln(6=H=QwB@cUTl!&7*6yYG0Ds!WWUQu^go+RQgpEan1HCq!#l zs3#inZ-)sntwa> zC1SIq79*yOpU3nSAm)w^lhq+hz@EE8lG1FPi{1+5wo<3DjMM2!Gnj|)sw;sXa<{kw zVbLY84_KH890g&h`qLm^?amhq6LBA;HTPoS)}05#toQwK_b$ch&@09b3X zols-mE2`H}zjiBw_|en81Jc^sbp$;jxYWEi0nyu+VD{vQdga8#ARX~<7PJHzwz7pQ znP>n+>kK*ZVw^r$uzxs-39wEAuL*Kco2pY0G7{O)B{_gV;~X31BkB*$1D+hCoMZ3pyK#X~>scLdFTK#QTnT zzAW;fqV$!Jh@4roJ+v@Tpf(8lY8qi(p)m7V-AJ9QUcJamZhEU3oBE_d|A7M}E4dq4 zHE%(J{z~t9|RVC%~ z!#ulRZyuA6B+{QrolL8aT8GP?S6I7U385*cw1Z44kX*3Q_X?Uyr(OR1S8OhBu_T5q0Bk&N}B5 zL!a2QlEmhgGo4Eu4pTzFK?dPE!?h3A$;`M|$u4N?r-#2lt>DfDC~$-t3aWmA_9*;A zhR?-)K%logUTOqymDk@`5cW_6DWw%{>HML3N9&3|uJ!|Dzt%fQVm&DRSziy0%+t73 zQY24NB;yVft&hCxlrswBB#pR)+$=8N2IrnDEbM~WCpx0r4iGkQNi)@^niuc)>JC@| zYMg9cO~mo0Wkl6|jGz`sKJyTOga2;Yzi5z)?5-XEVDSe)+7@iaOCw? zzrL$oKV=E49gK32aN>Fo54R|C`wetEeh6%OTh}ZUTh!Z%7IhcDK7l`kVb#2M2nyD60 zRPWH5_@76|W3(w~{@h9+cnR2;N-Kx~jHHelr8}j2fL$DlfLb;mjlj`W!(#g(A8>Sh zy5(ZcF(?H9YSazx)Xu}Ni_p0!1y@bE{32)a`i)tzly=HRA}P%E2Ptl*X4*=Uy(Zg( zRHlT)Zz}6&RgCH<)5b?ikNmq=O)CUkwvFl=N_ivZ#LV9Q;aUTR;*ZcDLI3^ zG?W95Mk6iHKsyqClE zfW`gQoOdx*4R{LxRw>tB48f*3$hTg!@qDY1CNYK{ zNQXPtu#3+SDraBjuveFORxE&%S_=}mM z3cZ|&4+sQaI3%Oo_~wchk`HZ)OL9@mCNGf%_LzW)Ml+DNk>pOqf;#|F8TY&8ypkh{ zelG)S`F?e5VFyliosu00m%E8GK;D=okU9jeBn`-x+Msfsit0(SHBMq_30ZY25$-MW zSr&$yr62mX2dF$|MJ3EgGoXO|K8iYdsJi4RDu2EHaP3H5B4Zf_O9Kdvy>i_T==Wr> z!}Iy8A$Bq}3DN*2ZG;lMtP_`opCiDOqwL*jQA0E9-wCYlPyZntInn@J8p)DOZ@m`+ z<50V)7~J0yf^u5@Y$zy^pluzK$2T(WZu=j;EJi7u;7=qc9qqOn%KyW1%(p%@W}|Lk zb96P5jIyQNnzHypqO1-E0(`VJ5(lE5c2(Y=-PF>bs8yP;!nrW8%pV)E8!w?MOaIoW zU&wB%I1medHD7=F9x2-%#{KAG&Uo9LNjf)lVUsBM^Y(<2DhC5OTu_qIV<0`&iqlKV z4d&~Z$9#d88t3NtFUPYdntV#7gVM^53Q|VPtgeh1fQ5Yk&=6WsROhQj>m3x%B|g8` zFi-5IkY)GkmF2)2$#xYS1j0eJEnt=KqD7+uuh+)efih?AY zcKm4;66=N&oXtU@w@_j{S#j&Qg7!4S@yMo|k$tH!0d0oKphd?J}Byo7POBszm~ZPXdR}A)aAnGs*y<_NNrfv;0{US|q5N$(Sc4C}A)wZcuz9 zO2for7vNVPF9xN!L2t?5`cE0m9jufd)*Qw<{5yJWaW|};cQC^s*wB!c?KB+hLDg)_ zK4ggK*o$>@PNdp#HZ3V99-9P6PPYN(9@u24=Y3}1lvu(}j5x9cFhJXQhBQhy{kCu~ zy#^-41X~R+09QdJ>)jNv=#oeCk;sP{IswANkE4ukBPaeC=I#(UsEk zee4L!4WOR17r(m6Rtgc~BI2YhwNKA5b}shsuzXq`pL;%C0S-a1E+uI$5~d|ABEUUGwbp;!N~4VQ74pNGwU3hA6iq6&T-? z3LGBAnK=h5nRzm$ltHQ^rmvWY7WscW;ZhEwlc$pNHL47r)aawx)kY;LL6;p45r(*Y zyIB6IvfFjO)(`6OM#!mDev@<(VG*r6Ds=e74^s;(RpeH)`&AaWb!5Amz-5si!=`b< z&>{fkzkMd9IpxV@%z3-0Q}oUpuZ97MN2;nFahCxUr0YDX0Xvm8#{t*TYT0O~lK zHa;a7Q3PgGNGNWR+3Ohup#gyMfyPb}#({t_L@^?qz!dob<-*Ij?75R^l@3QQqNG(r zj_9BvIg#U3IV9AX$|))#_k#Zropm#_eHkw*Enr$6(FLiApOK`WFq1_2Y#=~)m6z~6 zM7MKcuXU@zMSqdy54bQVz;2NeTLGoW5+r*LEj=rtqbL!B8eU9TGEc2h4#<=|?DfjO zO#O@qD>$^D0Bz3kYqe6ebfA3+l6K1AG1xI*M2N=x=7KoX`YC1jT*$+6Coh(p=2?oN zm#Zj^L|K8P;XNow7JWq+DH|X%w-rOJkGHkERVa|o2fStw>(^HW6~(9lx!cL9{x?Hd zuvAm^RdYOyL6+5C|`*F=JA}@3^Ofk;qQb+%5FG;v~{5Yq=@J$z}E0jhILR6 zNNcI@E1R4YM+gcAz>G1+vXzV_SwZloRz~=teEwvP^G(WmZH8leUrf`QWRAfzPychc zCe&+7ZWAIi+tpnATzFO^P2pG6Ia^ZaaKqVetSVQsvxM_-;)=!-g0GXQJ_vVJl_0p> zA`os1CCas76Am3<5V&iX@Ajttb!grEi2=f%nfD9O@;*d5U+=|C6W0isiE3>OI{ncl zfTT}WpavJV&<)YFb%jaVzyu(04ZE^N)$tkvu# zd>}S|{lo?9+SPBK7EFJj$5BIEKXEK7$Guo0&Eb{pr z>q5*WCWOV)fRa^mo1`KcVMu9lga`_Bz3+^PaVoN}oZ1%3s~VJ0aSId!P;RIeA2EHe z?loaJ?{Im650wbT%44d1{(Ytn1C(_+f6xy&1%nGKb(c!9#G>?c7m0WUHT zo-dGj?5w*4OwzM*q_UTQyBx!erh3E)8_KT)z8sECjQ75wQd*psq4@T%HE2iGmr!)< z)^vZ`GNY7TGj^*+G`Ue}19^lH<1u^q^m1&ullMZ}*NDa()(R_@*fS(S*+m7I5%1#VPw= zYOciJu4yih+l<3Utt8K_VZf?8{B{p3g!9vj5`A7;ItlJSVyO&(d6zGPVf6yfoDuvVfUj)7{XB6CNb6kkRnfH97 z++Mu0UR}ePL4)}I3_|YdelVcK4asM_RI0@Rh_9X~rla|Jr3S}vJQC83bi5SiLor}K zAwv0WS!A&Jncj_M-INF>nBW9fwzoh4dnpYC^t(NGy3w>S2UjIft25!jAY`WZs3|BU z1jVs&EumTk1R2XQ0qPSpxFiHSQZWD8)!Ed}yo(49Z8c$BYt%hd_S8(GV`D&86GAm0 z0brUF!69E*%qG?}hG9CVX3Z8MM*_*CNA6AxNK?h75TL9QgpJG~HTvZJjbk~~Y8jMD zMn%c6Nm|Cg0iA7s^s*X|@8frH2;>Z0ylt|Z83{}~FBLp=;S#DgbWhc{rs7xYd*NAo zj=<_kh|Yxhh&k&Nf+_CWZNXOx_WfcP_|qUd_q74U+Ss_k#9^h4`2U`vhlw&^dE`UL zP%syy3UQ)&G zwz|U)BQtucl=5T@G50i*CXR?JE+m0iW4A(QvA1HD)m5ufwNA5>MzWuxM-?SUSo$W;lo!s{M(67u|NBdJ2B5g2OBevecY@Oymv`kd$Pg-IKstn`a?yCqzb zkAjsxah?zqZD})NaEpjp+8~GWZ0Kr?l23*>4;y16UnV?dxUgps$*?}&Q`p?RD`J8x(^T{Z^{3TL2e8~Qo9f5JTJ4>6<|vUh>4WkqLtR^g5_tk=~OVLo0;=M9oCJi{|IkHjC4059s#=(v7985qVP^qzh z3U@VVNCl5rFso{d^zpd21e}{YSxq%p$TGVm=na|T6k}r5tqTqI$M0VY&Ju=a3fN?; z4uyd&mJuNmY^~(YqJuQp7r-EJ-_^c&MuEohCMphyPDrnxXe*>Z!Oo^t2_|=vMDUR7 zKGD@BbOPa~)>hkGkmBXCqBkT?R)SgaVpD^y-TELQ^#0*>F~z~6 z2xjHC1GaggY){NVwg9$|x#;I-K+BOm%I^~R`=|oG%sGubWUIqGUrsahG>)>G7+tAk?qNu8?-#>L?h}~SY)a@lK!_4^2mP~}yM+e8p%RO_L=NEqML@d0rxxW^P>TSA z@2*}e@FxspuVvw$t{Riv$ev@9ES zKxzc|M$o|;0G|H(!p$;OG3`L~h@#3iT*WoaTh+c2oEq)Y&EusVAN4#j)Xsziu< z7XUt%5q@pLi>n$M^!|J51X@06iYv6EifbK}TkH>s(Dx6h?KNH0`i8);jJuM2GgIa> z+3^^Nc);9uWN2Vs#Mq{1+GM}6w!Vi)LwP~e$CBe*ljl_SA4)*m=`b7XUUy|S@=G{c z-jYIG|0L!SV}W$~+gjO%h%mCGf)?YVE{EOk%DWhsRO|5(<~GtM4`rPlG0uTFA2w(Vl`N@IRs4e|r{cY#y%k z7#Kf_lW4Ld9+*+1U~a)(GD=OvkrH?vUvKoA#cS%5tpW_h9SjDX=a2-oO=0x`l^X;> z2-r}(Ek7vyUr|*(mje0pskG@Qn;L+DjI`~P9k>dogiRqBOeQK^1-+6(A}Rtx*-JL* zCwdkteKis5YftD|q!LqF1aaFr;Ot4kTUf?_C5<$Oep*7Zge{DG> zkSi-NB#=*g5kxoC!tKLtaunjMv!<7Qi48kDAhe^i5u|W#1ByIewU5^!)UVO%9GQ$TUb0+srIW8mmT7c0? z8{{(-D4@7%rA7(4037Q6*01$WDPVQXe3l+bzx3wU-rM zyaa(6nk_>8q@yJ4(?Ts8_8+nv5@%69U}rWnwOHF>8)ga6ioPaE)f!UUw- zX}ZuQBLxkDEgV)Vl(&R7&G8AF^QX`XcUW?}C<*a|o_>0TBni~OWJbBYIQ{24Krjj;#=O#x&;n zgd6&d6o7-7EGF>votIH2ih|VGq|`o2r^p=@1A%=K9X%O8b)EkxFP%OHtvP_^&`gNU z;iesJTYmEQq~?Gv%8OScs=?GlB^}vmUoo%6ih_ug-y>SI#Hf9!TD#GAWX2#9COr$P z5>S;#^!oX$h%pZw2|rAMmE;-tJB*{WHOo{E5#1nQd0i^QLuo3P8gD(=DV39J3ko~x z2Z0W5qG&m>aE~`@n9my;h!z?jPIAAvw8*B1?~g1P+-zS-j~k4_xp_<+Ns+8WZNSe?(6gkD+S;6^gmVImW;+!yJ((nEN7Z4+W8C zVxhJRlm$WuuSpi>6I%CCara|c65mMjr_ZsFl4?!FCv?}f&3H~=gI1peW~hpzBc*eW z9LS4Io^4n}SB0^9aSYIJcez0VBAG*@%Aq$63bx4}V+8j)$on=K@%I`d6&*h9EWun8Ng5(E3Wc(0ygq;v|pZ=2ha621>4)sjyr`bq$eITF<_bsSLMKK-hCAOz? z?P5-iB8gxS(-)jhi$OlGXLN}x z`7kkOF>H!gV#}j9eKDgrppVXN$rx4VGGuD%xs*H5MKY%lj>b#4MRur7nE@(2BnGJ3 zH?@*ODZNgJH%ljEfAnsJ{VV2L8UMZOy|~S`Qn~OotaihwE%r_Od?)~N?tAP-S4URe zU26#pL!aV{hI%t?s_aHIFf!$(+fAKUR38+MjC{htlNrWldAb}v(W~EED5W_WWm#48 z?x>~mTCDj{+_ekv>M;E9yV3|mEcPbZfgdI2qU%|2aX$)le?LcW?~K9W$^PINn3L5~ z@ywZkZa9R;S96kE3X2u44IoekVYAFyWIdhfuE)ee&x7hc5qC4!A~J@OYAsc``Ls$Y zVCo5l$-l&o;GU-5@Tz(|tU=60VNABKsfc7?5J=3GvmG zYRJeHj^jqber)+KCA$)lwCxzGP!O%lsY}1~#|d^RD{F@taUkN?QU+e2EGS1ml?tu8 zO)Ikwp(q58?&mgwgn^RA20fU2-%{F~bxdiA;a;v(f_MfuO8WShQTL;bA9)$v7 zNy#h~yy2vOFi|v^&E!f#55;JPPGueWHL3_mk%C6T0%L+=ovtx74z)jNb>y@!xO{O2 zbEFA?3)cOUVJoeGHt$ga=5l_`v#;7%G?xPV-HO2ds8@1T;x zeCTLt&L%rlU{8soGf12o9PD&b{&FRA`pC_33q*_`png0*L+S)awO=vJI4IT`9w3m8{CfP3@Z3ai}!|6g-5wJPK6U%*1o1we1U;sR5% zi@t~ud?twk7^94q6PLH_%w}F}E(WVLC)jlmZH=BuL&p&#QH_$afRCnu9tJ_h$!Sci zZ}<{AzX+^49cyhk7-oB^Ig`Tw+#7fCqq*o=)ddO+;rw*0Rr{AtKo0L!hC4nQ>XPcf zTf$hPwXfq)V1uT%AZ@sqt_+PbMkQuu2GRvf(fLuuJAEKVB#!uO0dLmv5Mp3>;KNkXzjQA{}s!n*q|I zP707o#pvw53vN_rxeuul1cz8YhK)qsej~bD7Ms_$aLim)VA@PV9>Vw(ZJKI_1f#9H z5tC_T?ZG$+qoQmRV7p-rGjWd1MHuy*VD`SdW#~1U@J3I>R2D&`d$~NWfPq)AL6M}4 zJ5XNY`0{^ixa$xAZK)mz`{GyjrGR&4o*)3|0_k=Kw9|_i2>>+lVv)=c(n%{iO;SAG zlG|!?Y*eocT7BU=g55;)N4XE6o4UW5h&>k&IOcApfZQ_nj7a=M6M1SGfhx|C+m@6) zLhvb@Xw#DvHk%WvXQn zk-$*Xe$to}gtq)o5&gN044t7tX;6T3`Sjls*ro?pj9QWZTf8z0$Ny?a06&%qDG@!6 z{_sf(QpMm|K}C|l892u{LGRuivdTNd&MAq~I3XGWh$DP!BnG?i!K<4V*6J%EI(L~W zcl&)S=*`m!kRDCMkc_1J##y7krW?BKyZt~;utrS6Cw9}K3oe+0BJJ+YW5Z2LMwj?U z_YGmOf){qH{y82=8i@Y(Y}*dQ)549b5@j4^&T)?a82+W;;pd-$uD=b&iL^;HoG`~h zR;Pr;jz6sqW3OwnfBG8aWntkZk7LM(!YKE}L|NabRhwHC+&6^*&J^~1#v%HkYCx`E zS!jj8h7RL7c}@T_a7iI<5q4c-LMld(2C(_^1`7lply3;%BaG(C6ZlTL#QnI9Aatxds0 zt@E`>20QVvb+HS zx4K&e*+DSy8cTH?PKQ3V5${sX*dJfjP$u_}cbC(;f&O&Fcv<&-L4epLE8_=k7C8b( zlllApyv$^d0a6tjnjs9XWaF`;{+o{>tvbvS{uvE*nflG@;iyML(umC#=HQxJ#_CA# zkSD!AU>|A)@NKUXihfN!E@n3-e&m7X5)`%%T?H3Gs$^5yxpT? zn*KS0h9jbOWqn#MBz^)t%aY1Xo_SN?yDH$TUj*L>1iIdff%Y#*B;NEU7ZeqOQTv9R zaeP1WQ4aStZIWp)%tA1+*;x7N& z8<#msDQ)?o?sbi|(1m0$YZoK3weonmB1ZH&&nBkGiCiWpi&&5HH{)y)^-yWhry?9& zDzJ1#y@zlmOHe97b50GEA0lhvIe*yyF3F=F<1AIjSGZ?!dx0r)thdywf3N!(+I(AC zMH00_$)sBgi^kl6#{vpiD0m&_kf%- zK(ZB~j{df!OowqanE;Q2YA6&;S-=H21VUOwEQvj)PuezT*h3WMIiT~Wbk9}BrX5&z zUNb*D(C8-?kvC}`oWmz)9X9kv;Au~t%HkYVB^c~!xclKr-_uQ*8RS%3p)B$#-(*P# zU)fM!uI>MgpdU7{I^sqib>52h7Ff60#Zc>lgk$NY&`qCA-?uYQ3FaxvjH0}raX;Mz zS8M@9o=W%mE_RVY%fw*VG-LcvD?WYu+JT}&(E*PQ{~wH-W2lTOV8H7SG?#bejN zK{hnrgn`rA;8&XXgX;lOCx|FE0qb1VfCRe-WOjb*-i--*Yu8d*-8_d`ipMRJvaIKc znk_p;OKWZ<2KUI;f;%d+j#wgJ#Y>V$=9Tr{g_7;MLY~>rdiz44#-`Qz_)2`iP~MYv zSuIim_2A)id-CScElJz0KXDuelahQ@JDjOSDixzJ=_zM{u9ww;H8n)NTj}A`Nd3`? z24cVf8T1jw{Yx(;$x?3o44pY{euKwg$SlG+iXE!@B zhPtjMV0Yz(3sPy$$LN>*KbjS|ZT@y?G2*I}@s5x^*PW5=Swi%Pz7gD8old|%} zOiW&+#!m0_DIVXp=--;=v&!TTJjUVZQgIHe$JC6(i5J!=*(HB!xG!vxh7Rng_Zg3e zO$L4c$xz=?6!~<$S<%r$dKXPFB{Qx( z2`UBUmqv?|NdmDP(o*!SM`@_tG!&|QLN5k|SO<~S{GbJ2kPzP!XO5GKI{9>Sh~AlF zEqILub%s5v?$TK&hrluzod5#feM?UaVG{G1Uu^n$D(g#fO7{JRbBi&{Y_YLgOlBD^ zk1f;zj*gffRbN`{CTEOiye*tah@C{N1N=pb~;)%p80ct~-AqGNsAj7POUTosKZPOY1i#Rw;=&@|g z6t-+mxn!U}EtN%i9N6~t|Swv!l!%phDsl_QWo;OH+XV%=P);C=~ zlE&j_06-bS(Pf<(qV!reKFp?|eBc=Uo93z(^S|Q-Z0>a*L!%ULWlNws6oHU^==!B8 zISGoVP8p6x7Ec+3-qVM)9KLPXiYz94bWM|k-ky*opo~vra*R1fV;WQ+nHOetpJ5XJ zQW1FuBKkLtHAHHx97V?7g@(=9jheug9aiEoN^+Op({+g#wrOxf-GdmU@Thxp{g31oV1PP|N%CYiY zA$WyQuo$;To5nkp4+-{ynIt+UgcpzJo&{{hNKGa|9B7MRdpynT1^(!oh$o_Brx-@Z zFIAPMHiB}}pRBosWFSJ5V-1g=A-W|iP%gVI1S6NHZ-(|(gglLiG~6U2MCWN{Zpkg$ z`Hr6F*16&j5)L=k^6Jz+SUbg(soBY}d$UEkkX+&s9EwNBRCzqHGnc9>;ya@|tkAHH zUxm5+jdWd%%%t$G#XowtB#F66k0Z<4LpZcM$&1trtf=IO59xgsVGx+@Gs;f~NWIYb zNIHPB=LR8+RgJe=x=;Hg@Va3ul$|PXusq8yPWXvAq@US5<=^hTI()qB?wi*U6`5rk zM+46fZdn@j*{D{6O!V$KC>fNzy0Q<1%)^VEv^}fi^Hn2%aaL*d*$Npf99Wrh44=)M zo6r?Vmkqsk38>m$4@|m{t}SHxKQxI>YaHK1Q`8B(*zg(s8PsxID_2A`m@;PdW-_X> zX1wb9n&H_e*uZ)N4TI#aI69L(njG|xYoJ_}1i=$VsP7z>EE}d8O{tcIE^yqTpiuXe zCA4_MJHE0~)6S)dL^OCUrh|Gu?Yd23mmu~d4P_xPwpP7PZPn7Av33kKka1lS>7*Ju zV%^~1Ip*%Kl9FaU^!Ev;KE^^I)p>HZF^32y8wKmvdAM4OrY=yp6WLsfoGvZwAgn%(%7K(9f&=2q+c&oF`nBdDK{rTn&0Gadv z=SpzMVF^4_)59njq6w}9U4)p^HoEi2zeJ+r+fu^{&StKDfipo>SpFUM5)z5{o3d>L z(Xzk$6N{+Be2D)zL1ef)M>9Sp=~y#e^0u8Z2Jo4biBD`EP|x+%DepnY0rvqKbmz$o zh_yJNZSO#;bC~;9;D2MyI?d6El%`0KxyJMIdZy@Ztgv{%76dEFE;=QFjNig%wrw(B zv+PCAJAdzLG7c=D*KR8XXhYD$)3PNCj4lEc2o=P6=s>;4AXeDRAyk{-5_WfLD^zx0 zcml{3g20wSMyh+GRntK*;HOz3xQHYZV3#oDMdV@TKwM-oUYl@^(4Vp_A;l0Oa6Glb zoJV!(2tv;Kxu_Ro=z^$jIhHUxX_i37@q)kO6Q>lq#`nvgU`W?;D^i(Mh1r#XTF=`p zXtTIJt|Y^iCU`bd>@}^Uv&eHsQ8dQasHGr<#K{ZrN;7|#V4dlWzvK!G9q68MLhwI3 zzgmQ)LTJjo4hT5>dOioe`JBa4ECNK;x-n^+1+cobpOnu9pq9dTSDzk8AL7qPgXVhQ z;S^$2z+-Dlqq>I+k{E4Q;wti!P?hwLQ<4Zt_{z-)CqYj&jA^5<)Q5hV%Ikbb>blb% zK*?NoiA*jx6HZFpa4Qv@_9hREPl#i+PnOy~2gQ13mLVig#*yw#uGfO%?cM`b}YV0Et~HQSxfv zgO4o|gjXb*Bn;+JMpg1wUvBuaGv2VRXh`SXhhdU#O@WeF&vJUrS=C+yNS@oE_E2Z? z3_$hId-}!*v=%frTLa0ewG%cKmuW>3&Kz*AR{-nstNubmlA2BZNz^{=a9NZ-;=LpJ zu;$V*(9m;FY6_YLXfvb zC5+U=%{Dayd_SF`8&xs=g9U8|h3HHGc;_M*lV0A|1u~l{ZcVCVT@6w&P;BnfC-;eQ zkc@Qj^J65o^QI%>z3k)ZBHJkzVi62aR$h_kg>8t1<-7JdH{~k#)C8^JjThV*5zCb- zW_}EG65nDHWiQ3BAW(mSGBhpX&E@CICC6M(xAnY-Gzh199bsB!>^E$9^(`zPE5?Ku0FC9bw&!R1sx-Nq%w{zIb2^uX;V3wIDxv+Vc6Mk%X&s^v zqwP)Pz=_rec!)YhGPy~J((ND-oO%G>y=v`h53?3VAqxhS5uj6X5l|e9u)vSI#c-ZE zI`{qrNkV>Cql7#dY^a_G-IVEr80~A4Ln&9NdL`o{y%OA&((!HZ>UNxX29FXu_A*G8YBNTjjsU z$Kl2BpmE5iHCHxq2%y*WY-BKW>QOGF#0LX#)d zQjw)LwwQq|lD!DAdes=!&({jkw#>^@kP*JHOQ`>o!k9to2YguJFN{CWSM%W6qhLbh zdBFC;3?4jA{*UyOnb4-~z`@*&ObV0%K4kDG;_Gxb5`Zmp@GlD92mYyufh~6?4P8@1 zYhyfyV+Bfd5fnj~Ch)Yo0(MHTsjsl<$petcH!doIv_(p* z|G4n%+~y+ozIdckEtJd;kdDv6?Ig%-J@8H*#|49@AN=?yPUG^KG*v*ty5h5O?8s4I z6$EHi=#>OJPGM24-*W?FS5_WG^NPqy<(k)Rf?ZQi(%WQlxAdSfyw72OXk~_FTq)pl zmqb7=dXs~i?1R|Xw$P>{aoR*d*&=&2-T5^lzvFG+fnoWJsC-ExLCCBE<{~_14I+iz zmMW^8DzgoUP$5qmzkwIw8U(IJM;y%OEi%(`215f0T3-~A6%e;`{1^JN zleA-sNaP^lGqQ0e5()$&v^DL*-yWpmsAikBOFs<6!jDcUgeWU@7W)1L-~>Opo@_Af5t{{DJSNrSzP~ z^VLBS+k~#GcXsVEcF-9LEZL*4NCKYcUsF3oL{2ht7c?T$g5zjBh(SibVUteQ zTi5`deubb^FT8oepyFUi6m%x7x%ir7T>oGM+eqY%Yb`=Y{5IOJ*Ru?=vEi9W?##q! zfzXO*ua#>&LXH~9pI4Igz_LNC%K+95&RBP)@WSuu$6JLgvH)9jEeAL(lGT3 zU*e?OIKV}eODfskoDxDZimK*Kc>U%NCteq!#c2TiacKLk03c?bgN_S%WlH1(aZZ@F zHz%fzs@C4C3hRm{bwv7F)7^YCBQ-56T9)4_N}O$gsg}w2V9+v@0#!}fi=R|h` z^uW(HCf{1h5ml+$W^`twQ27B@bPkP^6or*@A%Vd2&vb3zqqA_l`iLc>pByVLz@VAT zHjyy3DV{gK$tF&LC$c&$?)8qfG;o%ei~*Ly{kN3;UBSk(;2Q|Id9k=J6&99VO2 znn}n&d&O)IhiR{ArD|-C7`V(G3+|eS1)o_duh?oY`UifRFW*Ww2~wvyZ_T#Ui`5BO zw}t~7qQiGW+8`NY_@Ap!I(WnV^r~R_!$LG-`_7BhGZ@Am-|$4@Ity)cP4#tn@GG=Vd=uW+!f$x zzh2^w^cbVHdbp)v-whKW4y`vH2OCcbw5LoB098l;mN#3+Qui{~RA_oTJ+ZV7D78A( z;Xsdp?v-Of3zsP2V?nZFhCC6lJP`mG3AtnP;6!}5(MVnz* z6SGV>fzED9GZ7Te2Foi20VM8JBSIN;b&=V|kr}G0?*1(6_qhv~J z+sPVXb-TpSnC)Z(1jxk2LkV=&u7ZlZ#-pI{jiF||^Q>1p3IRmCqIU4JZ%jE?#k2V4fBd1IUQ9J;`u46|_31 ztd%+@wX}Vw5i8-046X^SUxO;fy9iW=Qm^R_%UTCyQ#OVfn{c${{f}>?b1rvp- zGp_`rkmUHoY!-xE|eH~~H>4Sqbs335Z0TT$7aVx6YcatLQqlrRK9(xy=p4yiB4en>H%>}CtX zx@iI9J`_wU#~%S8M#N?kne*D8LbJ6#30f6OUdN1(%I((_N|mUFExzryRV5z;K+e5E zCd24DjKaK4Eu3HlG<$!67Jk$ZOh&um^~k6~0x1Om!S0=e5A@StA7d@HP)7VD*+6x6 zrFjmR?E%;AD$6DKMI_OYe;YgKHU&Ua6wb;v_y^pnU2H+{h!_bmP>|Fpc2su&qILJg zja%fJQ!I8TKVI6SQ|F^S^<#EH5qsT8m3ml3Zv0vHPEJ5FZBRvq6jz;{;P|frpPq>9 zAR`|3vmms~xI>{q80t93P}xhG2cdtn-^UUsg66|K*(}tHLEG8k0Oa6GEVn-3&~aM- z9xF4QaoE?^mOoDc@AiJRmEL1Nw zpQS(~+44qM1pH6O72~irg)otfq^ICT?B(D&<$earbWvO&!sYIDBI8b8(gQ489mhxC zv0M&gU>Pggl4G_)@=|DA(a{6xji<>VSUub){MRB;$?$hpyd zAG9*NBkl6V0HY}vM-PLkAameHh5zPts({Ql)OFDp0Osmi$`=08o>A@ez%U;^4ASx{ z5icRxW36Qdd=UA(%`D_(V5C((gJ^hqC@QnmZ(L)%edgZiM%y}j>F6*H!(!bwGbGPs zzUw(w0ba#Ef7PMw`7rZo2=n_);u>gzaezka9cT~xKAR>RMalF0~s(FV2 zK#A4XM(hlwT@A_&qPedT0hXKkencaF!cNGqrzaPnPh&oa1PXjKC*Opri@05eS6*-( zx}20j#dax7t~Ue&jJ9@*6cpR1SXXEBVR{{2*C<>M3u!*({$OUAFm!!Q87$br6N@Sx zl%nig;IX~YH=cA$NrGuRYfghD!@Q0`a=(DENxb%W0%Q5XeN0}{`@Eh`%le(LA!Vaw z;NW9erIv|Zcfv)iL`0qKpyC&V9 z>N80;;#SUKXu(A*iap3kOi>|2Q1}V(-6h*;j}%HQ}k2f5?wZ{msn5?;%#i?*)1Ll`9<8ux!`!R z(RyaT=!K9=w!Pl^9Z8S7i$J}vXMILH>s|y8i`MXB#{$tdF7W#W5kLgcF`#%Kg&epF zqE=g_G@Xr&2a|4c#LgE$kz}-BEJ+3?cm-eATqCCu{j3$DBk0u(GYET!P+-_oA^zAD zCFo0G5vWo&t?FxQ14jV`{zvGsh}B2G6=+N*35b&p7T0W^MvkN7!a^&ZG_B9Mw1f9< zCqRGlM`qIEoFS`aapwdRFA=5s2{UgVqbXXRR4iAtjsTSn>)B5l7q&g`hddXr2JF+KPQ;ju9luNf_YX}NOVT&D_RbZ zoh3#cM%7qe^3@<0PER(8!&Vi=(1@`Go(DC6);15LOk=Z)P-;x6_ZWd_`Vb;Slyws^ z@VC^Q*y83wQ1ir_i+XrZBM90s)k&;H(v5YPb#R5L4LXk>v-S!6lGf(?@h^%1aYE3`K&qZlGdiK3pA%7E-zLX*Y*rI5eDrAg*F%k` zlsM7mAOlyRWeY#hbVyXG1YPEes2~;{5V$!CU=F>$`zNMEklZ7v3Kyc-+;Y+chdfzD z%JK19a!8#C^w^hX@Fqu?y|WhkoT3b1)fAQ(+msP`{&zH?Ap~VfNGwDeG|zQ?JQ)aX zYc3Pc$)8Bc?;D#N&=apXl(ZvefZIfI4r@g>fURfvqU8*#`pHdGqr?WegZg&mVVIk$ zc-Bxs{6?TP!FptNRbinW(FdBKqo$F-s6f%C3+D zALe8?;J}N#dQ9AzR6p0oF~JsudjnId=U@{XpmU{+-q3_WeJlU+WVOVUh@N?5Z4PJb z0KkH=g=z@1P=lliZ-XGgUrJCD;JN&d%BU?D4Q|lE#t9-#dFJ8@04`bf(t1fD&$<(^ zL<@YH2#>oMP%y0y1)TQFrd66!|xq*5MTo0!z;WLh zMotoDSO@jLW?bc~3_QYpc?^Z$zq*1_waV>6OCBP)!-P~cK;&Z$ZI?eBCU9IY54JXW zz%rH0);7-yPJ69 z*VfVcyo0DD%-OZ|g5`FvuSa(cS^F$Ulr+tkXBxW4bsh?FA=lcfNJ7 zja=J=P=~7+4*fJQEJ02iP>(1}F|&G_bD&`ep(alZ6M}OdehbaZ&&^jQsj11&+(u#O z8fi3c3y=vMt0U0u%uk7}aq;3zH!4W>o&v0OQS%u=Rqmn5$Mcaybm(_G=^Kn_WFI6b z(Ht+9x6BQ2f@qD<8XHq+y{1y^^if;$Ul$tZNnO$`h;TP`yHbk*J?H+vAO@023VaG8=8SdOyfxJ`<1{5(YP;KI7)XJ#odY=m; zu*os;WfQ;{<=*kSsqnOrxu0Qpw1JPdG*YRmTc^Z3l5@}OeFEHM|iT503N1!0Gp3n_TCk*UiVx=ZC}QL{NKCp+Kr z{1B#xEjb)9&L(PA8-N=|{dB@%^p9QGzFXxKIayf5EN5}VH@8v8o`W*Gv4UQP> zq_b7hek@^ute2mKd@}fHq-4N3xvf8CNM!@IxG&R=vYSWvANqdqwLZtd6-$T*h%K#1 zB@q|Og-Pcg$~pQRhr1Y6#D@hjD_XqRx-d|z;;v~a97$jVEZrlPU)ze(UPXbqEL#9% z>1T~$7qvEpFT1k$1t&;I+6LJ_((0`nQ71v5^*1TUpp26=c*#lJk45l4tKI3YYAyKP zmdl(!s0=RKkUicK5-BmL(PeVh+>8y%iN(+peN*THj0wRMEZ}g8=!{hJ4IR%um(lPe zSO^HoMh&oX86~{c)R4)wK8f`T<0?q|cZK8uYq!_+9#D_Hj%MievN>$flPv-ps;hh% zHQ@ChJ=7KQ9yZMxP*d+DXUc3Z84ego= zsRe~S4GVv7*7UY5gDW25nHQxJ*<1OSr2fC+c2T-0u6WVq`m(Q8!w>g{=o_)bQoonHf#M;KD7{W z*Fsv~MAHV05#$oKDCZ>}RV&HWc7CthFIv#FM5@jPp`4i(mE$9r z8xkMYucyRH=zl%+jp`@l!*sH5Z`!HL*mbr9^gF5k;!E(z7zF77UCG-u0Eo?bVKD~5 z6+Z0YS3~jpx)WSpB7?G$ew8Og&;p`VxpPJBD)wdl96(1*r$fC@dFM0PVx2gGHzLTekB+ z=cDvN%me9A&$$q=)lchbjKN-qJ_PoLjI8%rc^g*$ZaB3)&LWr?{XfZ@WFncu5$G2s zvPZ$^FKZU$RQM|k;Tq5vEI^aoJO0%<9$Ac31g8qx;!)Bw?@=MKI<2dVYckoN`(~ps zxLn&1Kkw1Mx4~5&l#y@Buep?-``-~+gnU0ygE%Nab3oPD_`v3e&LNoW5p8@=WzRbB z1Tv!b!aQtIG36mmOC+q|2J{oY;|3#&Ik75f$ESrq;-}}wvDOj2#^fU%h}4P}?SL`x zS$;_~jSEl30&ACOf!;Pcan&oeMH~kc@<{08Ijt>e#`w->mq)Sp;BO#)pW#xjcpQ2# z3|x)yMj;yR!HJ6swJ`}9Tt!?ehN0)P4+Y4ZS=l(0op>wpyCzA_XpWWQ9ttWfuI!xX z_zKGexQlOX_gvu)%6wc}AH|2Y_cc1uh46>hlte?Ux{u?9^Ff6=4pRS)?Rb`6K}v7{ zNOALH^?6C*ED(_*LW5?i*`LA-#dQB^89x=XW}Sp~@jYuKcSPX{b1=Lqk(-fdr{V^z zsINNit9p6)z_VO_5e=tKR?X_|<4X+5k2-N``!r5;Hs;Z*MyVLoz3GY|uM|5IDi}x1 zCAn^$;^ZcIgZMDTm2=@YO#I3V()9x1@uh-6U!M#=uV%LJgsY+H z^P&lcpXC_I2>jSU{_^II3iftCz`(9>^_$3TUlW4FbuR}uEOqeVQHNZ-{Z7O*IO$rC z1Q%=ddqwRYWx0=? z0M`{~J00>`!3n_fpKL`}K^_l&HSrR-j zn3^B4ycypU{?1K+u0VA*9|gS|f{%6R_h~ucCdARiD-jrDB7@w;i2FuK9mZ7>jngTV zbj=Th)G~P)YURkJ%L7vku6sln4*=D+)C_k^%TKDcIMSABdEuv2xga9N!An?^PMP?q&rzu1X)((- zDtl(jYp$?h$aSc8ipqK%VFz({gQ53Wi!l^^(S|Qp$b);tAd8Gu%~>WhLxW*mxP23b z_hZGheQi*--Zx?$u8_f5N-D4*;3qLyO;B%(9vVD00N*?1^%@t+f&WvYMDE#JPPQPO z10_S0%A$&HGP?=r#f3|cN-P?EN_gbZ1=J!61}^d7L$qIGnyL%Q`|)>{C;j6+0s|BR zq_34GcBpAPc(5h>XpS8LP*z$V1Mrp*F3xEm1n+mic!>SlV=#f@zB+fA*-#-tz)#=X zhq5ZZfC@CtNW`BYu903P&7x6ACaumWcG1eQyQTGk)}AtvQ_||{J6YV2UvBaXMG6ae zFCCWp^W`&c^;Us|;FhJs*XdwH<5vy^1-uexZlDe5bHsrS4sM2*rswT*=v87ii3c(x zUn$9Z{XRGV>%G?^b)~(y6>2ym*W`MhZvh&7HP&xJy7!UHTOMqf*Kx}hnE0LCcZt%* zl69jH0h(}xnWbjy#T$922*VSW$jHbKq025wrWu3~qBHrF-a~1UtRL>67Qs_MQMZwR z$UAu#eLl zWK_uG0jLOw&B~jp*o!vR7tKm)@#0@_vuGWDdc6Vc^=Fa*#2>j8`>FvxXF>uHdV`bV zS-h12R(U=!P`lX@29a|}*a1aQL}o%Y+C?Ik79%0n25)(Bf(DWXy$y8@0uHFltQjT{ zCKBm$0izOiFYTgjr3=Xp%3nT9k@h7b!RQ5`H;T8`YwvnG8;928vY2LCA5U*lAwUw+ zu2Hsx*(Wh;kbugMs{lWdqc~Y3kP<18Ff`jDs&^|;`#>gpe z06Molp+u~X0!)ED(8|;f8hx5orT|hNiopHy!SjIT0sgDwt#zc@Y}lFK*WNXDw9gtr})4?wA`eH-Jb#aM|LVNfkWdW_8`lz7hSK30)Xh-OhEl%zUs;=HOFho zi6T>aXbJREw~9akP#Eh#J=7T31>%*6^17j@qO@uG=ibfyi)lpWtB+@~8WhTacmuc= zfdTljTvwb`m57lB`f(abqS3=7SP1y(X1Xa6zxZ%uUx9tG$seFCjkk7J2|=a40E+AJ&04vnC{?Hu<1Rr8Zs`pk( zJaf>L5-r=g*_GXB5sK?C*mhI_g4;7y5T#;ek1smCs%G4?ExLiIW~RQrWMq$awffK&B9-6pqE@hM#ExG$pBfktgo*@Eg((6aeC zB58;dx}QGh>4Kr>-N9)!43asXPITY4ELRsY;|TdHPSPar9z`Oz0aoH`x3{cw@U3-C9|U5T z(Fcm7AK$pQ05%}9pM@1zh+`CS$;g!@X+xl@0itcWy9ciGdjzs3$KI9y%f7S@&?5Z` zuu54U4viI}d9_2{E<(($wVUjCQG0@~P>(IAT=JH$OLLG2{DC-3d5BlykD9D*t{*kH zlMFpYl8V8o@~UCr&A4S(jlN7E3e5x>Yo2sdLw2kOcaDaI8CLz8T!f!vl;+o~bO#m> z7rh529@NG{PO>uy~?~H9Ch8Z`bs>C`W7HO=LF+m7sk3!Oc&$bi<2ET77=vD`@Dep zZlZu917Hy7ureIaM5r$yvGU+ak{g~cpx5<-NSF}hjoaLBM-K~MUEEK24$&6IZ0ADx ztsmpqF=YV@oJc~@3mf_CDabY%^neTvL98rYdgT^}8v-jJy@Flh-%l#0&}tzX>J!v~ z6g3`9u)}c}%>UD*baI-){HtCqQl%LXRJF~n&PW+YPM9s(kz|hQz*x(q&%9SB*xXc+ zJ|>sy`Yk~W+$w$#WFawDn$|~rx@@aS`-2@hE8$#xglG$?nU)yc@1|S1YEH!f?+-ZL z;z;DWWJ>_i>a)&j`1^g94~AO6Qo$k;X(hvQibS=H>W6QVa_xquMaGEH+lUe(u90q~N!n;K4qOJ}9Mt7FBOq zH3$-0FJJ|%;f1LQy90o8&07ZcgmD6DVlS)QDdI$UC;^&ycDIYoD;|NokX&k)okK61 zm<9yrR(DJZl3}TqyB;Lt0FKf z9#OU132YH~@@PilgNcCT0~9_#EZNbY0MzpaY}oAcLB97uBqAcMa8p}3M6Rl+e@}kT zCih%dvT2s>8Y@by#@2K|9uj(3%JCzGqD!q=FOmXZ3%U5tmQ87r`F~-aaZdZJ$CErx z@Tj2BdFFpFhea7+Z-h+v#yE1e-jJ~fgVIucJZ&(Fp$@PHuboM}aO$v7QN2?NgfK+H zeP}aN1q683XGyCwQt=ddSR9G3FV6O4jCR!=)fjcb>+JoTAiC0pha+bGD6s6{cophB8}Z8~sXWSf*|1<2o^4Re{eo!8LY*`wZ*pxCF*x zDvQSz$uv^}Vk*~s%K%+KqQBNM`8q1mrY-W35@NReuUo0W1lEQgaL7;9Mo`6dOIZsb z(Cj?DHDCle?sCR9%N0sbx6Rr(M00yu19R0qbDB~RjJv-@u`aKp;Y)Td zy6CYpVh(A)hV@-!ictlJ^iyhh(JLFFDxY+VCp9Xu$9BaY5(EHrs2ELd&^AO_h6K8( zqiuH-@R;kq_^>#P^5p$X=p7^iX{Swc@+PYwemAT-Tupm+oaZ89D1vmHMcsA|z>THG z82TTo23tWUAv24=klolJN_cEHXPfdUgwPXW>+d|Z@!8brV8xNcX$k~<@5KhTvB9?a zUa@dOtV62U?!v|pBbkXbhP7TMwq~`X%B&s58%x#-sF3Kn_Io4%c)e1Vt~wzBMs_pM zoCXc(h7|Roi9LR4XL^w2$zWl*0ovNabAg{yb|3BmX{OW_mIc1biB6 zLKEb6LAMz}GyEY|dCKGpP>Zf5tEMy-1R(1xJ7VGqa|jusB%ojnb`M4RN6PqTJF;O!fDw3yvW zoWU6?5c?~;A6PhM_>$*EXWoZbnR?{StC5V_8K=JohD z@i3FArFAK_@mbniPyzKiRz*(!7l6PLRoKPlue+b4^#O3x<|Uw0m5zpF8KJjPX;}*z6szH(m%bC())B|A*a_SjF8MBjS_L@1%vSd-^QHp+lt$CheQC8NGeD}gbFQiK-G86_c%PDQ~=d!h@0Rdcd5Xr!Tn zC?95Nf^o0&9jd!;ynLYq8Cj2|uPK?d-%7yZX@6S7whaU0+aQ%XljPNe2vGe>oP_k0 z5=ldk43-$<44uJm@25Hk@D$)MaOF18pr7za2_TI22+~SO>jcd{Bj<^pLW>x}{Rlfa z*SRpqK~mD@I(opj`c5z+G{WFMgV12mE5LQ!$gX&N3TdI)DHEG?L`mHSCzxO+)8zrK zHa?6G=U>(vDo#Pt#Zbqm;gR)}6s1sE4L?7enhIqfDr-{7XYNYueRH#lisIGzcd4di zFtA?jqn@xdc%D$6gz)fI2yQTWCbYjQaCcy%a(ls;ABYG#0vsX=@Ixcr$e2s>VU{e+ zHy~Cj>%o{ZUkS!)z#q@p5x8_oCx2-*zILPKd@&qGk~XH{=8{4%h_(otKO);2eRUZTliJ8m1LrV>7J|g8oJz+~eS+@QA~8ddK8O?lH?qU7>>f zlW@V!#RA|eeRd3jjWw`F%5#j_Mk|(~@4G!qS>JN#6F78`H6+yTdXK-tS>f!f5!?Tf z&b2^2sws7kG~j7wxODO5=OG1!H(fd$FjPhfi6_?>EB5{78`wt|E@INYw*LR*i87X- z6)S>_h^_9Dg3UsX8Wfbj2cfZjbQMnQk;NA2_1OgQLgvc=}k+*O7`7Vt61= z{L>G^HOXh}9%RR2Ie<;~yx~Zll-K?}b#!}Ng-=bTbfFl^Y`#JWMm2a9m23t|pijb; zv8AQ&2Jt5&le^}kI9}zZAcq^G0Ea_3Qb#1i_IDWS%9mfsvNx?_@v-1|OPT|zJJvwO z4qX>!!KuecmVo3}dtSp%2>0B4!qSBDDrJDug9OnrBNX@ab8z6AMSO=_f&XkkuAYaF z3@yc^5A-Jx|6se|d#9!Am`{cb^pU>RB|{}ith6w+7L8S5w3VC$7dgf zR#$A1oDMu)%97}Sz=&G%?F~oJ>vToAv0>4}D_j}=%jTh;S)$*g=KE2z;R9;e$u8&r z<7EPgCF6@>VYfA^1SA;|9twFY`rFl8LOYeV(&CyY{-AM_&NXN#rPm5(i+7e}PH{Gj zA~i8V43oN(5sOmn&|e8gKCcLEb?k{)f(xq)(;K1GV;Uy{H+-~;Vta`HzRmOx)Otsd z8rC@cV&&q)IF=K27Ft%P8a2r8O=T~FMR33y0aEfDswT?g2!3yA$y=NQ36-n4ia zzm49Zu|jF#wD8}?e*f*B#NpPF_wWKK4w_~d2esp$tq68_^jLS>bf^28ll5GBu~hjw zqBRHiY(Yi=|889=`%w96WQ^Gu<9rfbX<4yPXTVB%4bRB=LQ)l)u6@D^M4s}>F{AiY zv53T`$!!fblWs?ACJ5SLaFi30YM}EW-zv@tht8gd`onbjYW_jTr~)0PP4SSIiN}M? zNq=z@@S)f3?DBELbi3G-X(wd)vqr4}h+B&m9*9nCiklN|rx{;5ERX)H^=_}pRp~WM zhEDVS)xI*jZe@{3ud+SCwN1$HK0%IaV_+!I_!eQ4#@>ozm%=K%7V-C1b)XiB1R`-j z%k!OpyyL<(kR8H|_*sg-1ekT$Cxi51QJfUc1zo+W=Zij4dwd!Ps^S>rF@J3(RDm10 zx;~OYT|_lImBhY@j!ypcJe(tn)*^W9&pt?4*!&6-@$d(V8Bs&)*~lvGAsm7lGNgVl ztR6PU!I9pv|47+I&l6`d>s-N4!Ertf*Zs+kCfFlB03{4lPS1Rkf_ZZ7keEGPi zz8YA{L_|*yrhOg4;d?AO_EQ8Tpy3#-wc<8&c5>j^TaYuf+>qx7CLdhe#Q~9eg!VY> zjqMW0Y&tVu6G9_D%VY9^0$n6?>p46{1qF)M&4vbae&1AUcmbiRoCUzDEnY%pjA88A zdmqlFXm6>T3Xz&1_m1t5&_9r+K_-ZDoNi@{9Q!!D z|0r_$ONVZEDIR&m=WTS2%M3XxyV~lJQXgT0u7rS`44n21YCpS%;JHX8>AI%NvLW#v zPMkR^$`<0qY=(uKnZ00$n8bzBm&-NV2~2eBJ564`$UwNVgAHC#q`r^1sYA2|@|LEv z9jnHBHvu@d1U2cpL$3*5eC9Z!i9j@-N3W=VE(S{K5~c70*u2EAL}{VJ`Yr+U)rpgL z4plMxN#W_UB(*Ufa!JgU;nqMhy6Jkm5wT2%?IgY%#6l)j*PkYusdhUVBo4eYq9W}T z20#}esoT7vS`|cD{J>Ddni?8jjGc%)pCa0 zTp*d8)3dt`smTQW28ra#5tUSnUm(6>ZiQq1cl$Aq8KDg?lzo&;ch(z!VD!h|xx@IEyiX$_d1f`HwG}4`D<9=Vl~ZqJ5R?Gm z<$N=`ho{p+Void<94=E!H04&Q6{7LMe@bbQYiraWdqUq-iKjm#)rPZ#wRm05cD@Ut+54nHWNSr2-S+K3yrzZB`xYK$DEYO}!x z%ffInhmbyDq;-lJ5@ll8SzjdJ+;{ffnsqu{Vh$ypT`JVqehI=RH2tNf$-=N;Z`EfP zy4CzCwwM)|JNen{AJB2ov-S^S9;$ORHMmAPTeBHS`OA25qRbec7|q3(9h>B65-88j z1-!fDVrE!@80>R+OabVpc2?7}`B)30D-H#oA3>Xh8m$5x=rJwxUNt0q_?5`d zp7lzl6EB`o4Km`O9u+H^mGyUp^1O+phCd+*O*J>P!wj9`2cfb9T2oEFAz2=yjU3UJ zt>}%+QF16ne_5w~VHa0@OFku5BW)3e!Ds1!qqp_t?fJle2L`M(@F{*O#y^;^R(Td8l94 zO2I$>y2JAc17WtGIu-FzEK?f~C+IIo{-%L=mH-ErQ(7)m?@VaL1wDe=$X_wizHZKv z=n04wlzGI`kf;Di(ujtnW+CVSIgmPSei#5?spIPtM~c4~FuJoC$1PYN#TyMVDmcsp z>=uCjirVbYJ0;PE=5P)dMjiUYaQhl7Q#RKPEa%9XX740sSpRE>h-^I?YNF~?%JMpb z#vOqh9MIr&WDb#uu)0qcLJ>>W+BaApJOeGmRO4rcD-miRohVlEzih$5CG=Qfw-raC za1%T)Jg_*wjlnRF;#qj7Ou?XHuvRf>VkOSz$9!T5|J!B7y~ET%TgV!rRWibUHQ@LD z?*~{+yXw}VHO9`Vq`r|QE>0pQ8Z4juA6;jO0y z4FEm!(Y17=BY41dixDMFHU$x+^qx^lHqF~2;>h5R_5{FgZ4KWU@Nj|9KG}on>UVtr z4SKHy2G1}4r1R&kXao;gjb5VNcl26pFIzF@Ant61gXFQ*mp;PXi)4X4MR@SikHRcL zpnoS{x(0dh*|#Hlj)=3a&BFH8zG{$QqV8Ogz^bG%X|=S=0af=;76MR7umUm%q8J4b zU15cuT>L@W*V#n>a0H0G_`lb&KFk)?8X=f|#{+<~*tpQcHFevt!~u`b&|2v*rv*m7 z_g&1Dz%Eb}{E`@1e^#8NoLZA@DD4@aS1!Ys7&aT2q$j1^rJV?p^X{72NXKF?XLR)O z&6rUbs0vV{^;Z?bZYtGh;|G~?k}j5{TauvEceh4*-o`4{y~nJ9xQ(E-N6Cv@VTpIz zI*g{51iJ~6yv$CB2=<`6(AAymV?(yR9`?pdWtd6HyQaH*^x(P=J=@=X*->x0t>n(EXpKk zLuBJBg|id0M53UE9SG{1dXU>^nf26ckK>J|Dv3%}`Dp{1S>_9z^AhA(KJpvzFB-x= z)kbF9TRtOx#ZL@wU_&lOHfK6+YSOtcIGRnuqmV@<_*;(-IGQl_|hsEkeb(ui0!En{I!Te zSYqp1<1W+5k)^o#?qq&W)!7b<=&xn?5Eo^T2POjchPkGE&YLslQy~;M^R2HN53=ao zCKN~{i`Pa}qW2I`x8XHA3H6vU44Ugwi)t=kr4!zBxga)saGV}krx`{XR1m@G@fd$l zyRSazuzEVFbTf-}8@L#SI-*_7GT_WyhOCtW$;7!DpUmi7hoZil0Az1~16ync66`-S>&Zo*yn3;4+QKprSFqtqAS8jEfpE9Ou`}=HKSI%I7srt;( zZ>(VRhu>UJ1W*6Gs-PF?9hZ^JK-+pvFl=ArWzgDRBPm;4iObIUE`-(Vr&iz@OVnrn zFWtRgj?J^fz5=#+ZS9A6Ha6?Z9M~wz(G&rlf8;1CMJAL`BR~W=C~gROXgJ%%FJ9T8 zOaecErzvA*y_kh1YOL}3>7A&aY^WOkVp6h0cGx&b8nCc&{|96luKcbjKAB9*X|WhU z>)=Ug=e-MB;KqoL0Sar-K+?62LP2bXd^?;0k;Vi*LA}FX$Ugz{-zX;6MC2bnG8e?= zlyUKa8UcI1l#48R+`&h*6#<@A^*{LNZ#l>1Wa5=2&-BRFavXJAMSdw}Dz%2D>uL^*57HABy6uA_+n&=25~49) zpFn6mL_KBjFi>=#uWch>z{nlXf8_lkLZ3m~!KnuxS}4EHV@2_m8!`ZrMea<)c|FSO z@o;!?q=6(h)mw9{u?W!in4rWLh-|GZ;ELL?MsXCICw}2!>Y(0$tTV5|+0Pm0PGBw= zKDHQALoISN#=>gr_`-vv8@PcKv$v72i3R}uZqM*kNXw5B2yI5zdUYWRBnF@Ql(YKE z=Kf{k9Vn3S#lE6&C=wzItA&ShoQKn%ns*$hZ6fKd__#?Hjc&}nF(o6g-0Zqptjov` zP~*$~Qv+BW`)yL-h%5K{uXADPx)Tr{?{P1-;|K+grF(_SQlo(HT0ILGtgW_&I6$;EpoF6F!IhS9LHf(yF&qbt@BBtErkkW;B! zZTzOnzMVk>XM3?}_}4n}Rf;}Q zOHP|N2OAlseiq|61UZ^M6Zcf*8OzEwU@o5#EO|L0!QUnC(eUJ%r^|N`kMOF`aX6kX zWy@63F#EETDlWG%m4*V7z16^@lK?&S#J<+gb1g~Wj2(~1{WX!=@6#mlAJG!g&l7A< zn*Re#Uv<88dN8fbr0|TGWiX+Te+7*Xz`7ai;OYGpUWChVA}_^yQM+;CH-@a?7-!6X+YfcZ z$2BVS7Geha8B%OaN!z^@cvT&25Ijq>?3Z7!!`Kw-MuDe-(cP$z7=cbPZ(1re{;7#B zUKh72<_L_Zk0F&Ud#)+yB5S*rL(N4p3w9xKLFg~ zbgP5v157mE;dMRzaKH>3MJwx7TdW2;Yg(-21E!%EC1xAddMoTSb!CcNaziwA9^Wq5 zgNB3@bCQJ3&&O0(1RH^M-%nf=1txG4AlTM9aYj(>Z17uL^;D`%4wiDlysjS1;lVQ` zGI_u*q4!(JW<>|?xgm>sN#L}bwEq|tDh#n&gT@gPI7`6!KL65RZt_Wd(gmp0CBalz zD{6M0HOA5f*u!EhlLU`w0iyuoWPMX7WCX?Si{6N|BmJOe)+lZi&KpI5AY!3&M;rHj zK{j8!v?6@wGU#I;%~p4cWA;o#7b0t>O^hjVNcyt8;APUbDC=b4F(8pRN(UrzaZX+= z2Y7UumAHVkBPczaElpHiAwo)4vr4pRlEx=92^e&Grez96AFwiv zRS@9;m`p?d%Z?Ctd@@LIv7_{sin1F5PE?DJZk!0))G&oVQ;-b!(q2>oSdK$l_!y|W zTfPWJSh^3~)pM&Gr`0CO0_Fu**=7tz7#XO7q1P8RkE^6bEo%P;&8hi*L5O!6rL)s6 zUn*83gE|iV_2bUD80u>e}morM#^mJ=cmV`j6p4!`CkZV4JH$ zg3CyE8&g+UXFl@d1>aH_8VKKji#`I`ieH;V7NNHh9Pxw{BeTl9$$-8geIW}xk(3?C z?I0tzn&ntyDJqAo%TEFGuTnNh5STug9szw5b)#G6LSi#>$M_|x4Mqm&3jcj%FPMsI za)-$z;c)F*o3xbWbOV&_sW!wmO*7#Gg42lA-DS?4{wRv;if&(_1FVgHjcK}ju7%VO zGkhU3n&|9;YQ!?;6CxmnxK>Y`ws1kzec>N$MnN-H*91HZahJde=@zgNg%cwhe;kYgv|DvV=%4j1S8#PP>#uzy&9~M)f()lAOinw z6qGbizg`sPjfu-pCP0$?Dm_7jDrDKP^m8T42b3`;EkQzIw&e^BjiY3u(%U0$Qm~(%6kM&9R=jVms% z){Cfy*nweHr6^ov3*s-+7L7Lm#G%IYt7o-)0lxOF5qkaZ%?V7{Y}fqVK}Pn-W2DHE zB$RLiZIS!%Ldg0%&#CX=@DWG)4!oJ1xrPa3*{g`j4ASuBchlhXMB4x_NGD5nWSf`$ z$HepSWudN_$~YaO6Bwf5_CM-J|4iz!(xPSq$C%FP?>)2D-oVS$1(wz%(zBi(Fjbn* z;SBF({V^-b3&OVtxVrO({VPIrbSDiEq|YtNr;;S(XR#VRZQxN%)SRZ?t3PMoEwNR* zOTE!XfKW5ps&x(27{bHfopfL}4#g3Gw;_gKecWoYn+=1P0VnCCLOI@dxt_v8SZ(Lq z*L)_nFp}C>eDpzV*P8dGkiZKKFG<;B%H8-4alPu8<6PKYRHxLhWrD#0pkG%lTmh{P z+;RAlHRq)uv!6m)_9$NcGCkZayidU#2M48&Nn`{|z}pH^bdW*}x##gR(GnCtDmBXN z-o%>#Y|sPBxQ%nX&T*nG*eB3IS?bH(!hIbBPjBsgA~App?K0bv_>?jRV|gk^c;vZu zS`-H$9{A&%otbJ}Hh9x9s?hwG0oeh`U^jjwL`NvwaH~d5AWwB^Lc!=cAqL1JJ@}4v z)l_=CF2E?(usZM!;jzX0sc7e)M`eRVECD~E?;I)K6@^I--Yn1=X@fI|gxZF1z-bk4 znuRs`(wC?_I=X2R9GR2Bu`Jeb#LQWMO`FELws3(Wz1wQ2t z=;GlAC;aoo*~!P?iTvlbR-v6njSAYY|Abrf{+y>%m zorJ1a;1pEKib5AjTjAeOGT)SsW(-S^4Niljy*oe zXnN8xBiT?gT7|vWd_V+saQ9vS1i7hep&3X1*bs-G0>n3pRs@1oV1F>B-OqSl2$g>cFMRX`lWVeH%^yy!E_h14WjG22u#CI#)5w@&JngUQ_AS z^GIousM8J`h(to~Q6>{)WMdFMjd-6V^B}t`ha0ukJ*=1S?qyHQks;k2MbylJXz{J0 z-C6+|kp0RX_vDN2#RE6^Hy^cXmc-WQaFGo?AOqch_KS2t z8eEcC%gKvEl1XR}%edN4;zT{NB#BI7LpA!&uH8uX3B+&|IjOlUee478CJcRQgH)9)v2?R_j ze8z~2J)xpYT7_o~vx^396h0^w_adQo>5AJL1iN|A>G+hc^@P{HW$r0oZpl-@w7dv* ze;rPy?w?(=Bt3}}qB%9g#3R`OV{vyUE|=I}Hq^P)mZrFK%PhKgugt!dfNsmMA#~OS z6Daq#)|=>s%>N=nAGnk(*l3(KF#)os%6UJ}9@5BA_=c!++&STaeva4@4!u=q5*U>< z1XmQz+-{iC>Ri@8%)KF9mcN4&|3tuBs?U@^=tnL9mCTE-n_ zOvipf>3?kVE8}%H9Yr9n@^7#^4w`zjU#ND5{C)$ z5OS3VOoM!e!)BD))m9L}wfHqRBs5{-!G;M@y-J3TOSnI2`m#)(#HU<$OB@$4h-?~^ zMEwLJ2PH%AmU|NbMdVl#dJ$02$Vuq;Y(@mFk1j!v=mfh460)-h=GF){PfCvS?2Kkm zl!q)Xn}4FqD;#JqO@)FJlse+)7YkAL)vy?)gg&Ff=GK&{#ZoF+ruC&a`6&P(vCI6T z->(p;8Rfz7-T;~nzL()`&{7~Jf}0%j$?2~ud~B9Govj?EpZcu>g+Y8sGcr92tag=j4|B`+^LHOT0+zJSFq zP@n{QSypNoGN(l5RJ)jXKCMJwLE|=%I&$%9e<&}$`020v=mt-7KO+(0rxm-QkL#`< znP;W|n-!S`c^K-RB~%yCX2HCun> z4fK9eId%;g+3op@q}BFpryw_e9Ov5jB71oD#KCz1ndU|)c45Tzl})L~IqJn4TsC_d z!FK`;}&Hoi)}e)al+r)!&BbPGp1jn%`tG^#>i_e&;of%ALcH~{) z3dy1lsRKW_Gum)AGpCSI=A&x^gxvn_(WgSL@?lr@O*cl%lEyOzRCC&9d2{PVqV}a$ zgN=61d`2WCNA|EWt=Gho9N6vV%r!AeoOm*E;S?wifDLdFqWbW%bvs)%i7w+*SJS~{ zIe!h!fZ=%tpauv024 z<21^0P_PTPPYzYtqsU6zN$z(kcdNH1C1w|S$SFSHd_t- z&-hI9(W8@rL)QW?HX-uN^9^{goM)C?$H`t|C;_Cm@XBxnOcUYZcpm`pA@aUJOWk~< zD6cX*5JQ?IXnk-5p$cYQVsYt*Ce#93)RY8PepJ(MRRZ}GMB+}*ZtR?esUk@MLjq0I zjOo^LY-A@!yij^Sq?JB5Oz=pDRB;lfO5LGbh>J&=?MC@xARGe$WJn0OadI|*P&xNX zld+aL#fZui>aP-%RwafgxeTTGm9KtY6bqq7&@WM*R)&((cpPlQe;Nsn2dkijd1sZ*B#L zW2=&sunPjx0rp<$XlDPcO$38KFM_aN(4Dwes*j+Fb*|u$_W;<5Xi^lXzz2NoER9Qg z+m<2B;O`3nN|!n?nlJC=j|nnu#W=a6rZ&vsE5be{QM|ip%VyS3K&Uw_0+g+kXgiVf zYAqQZ&Xd`Q!?ECCo{Eav|A4QtrSU+DnSk?8ddm*uvAeFa=_13FMe8YuBW$|z|?6Q~}O0fj(FiR!`&Of=8y8B~?L$_ma&CjwtZ`t@dC!I3Km*!@L5(zC`$f+m8sQ zy-8AGI(PFJNGyQ!j&V)l{sN*<;)TPX3l}cu=B>w4ik(>#}4QsU&=0;dMEJ0(Jx%Nb_1?GQ&=;iTB6n zaoxq{n<~8F*F|-r|Bx$UP7}?$v8*xza~9G%168V>Q{W zTE$^f1O>{K@XKt>pkvN&!(+|Ccd6-=>@~5(VMyAVH3)N4!Z%Pi#Eb0rVP8NTTFg-5 ztNy$p-kLr+K~)1?TUr7@*h%^cg1Z?7!+Iob-LzTpu=-FBQWz6cBPualPG&%ik2C{= zMYz#q)Q+oCuxA(qSD0TgAjbmOH_$z=8I5tCPY#f`Ts*JO3^%5c`hdlS3@;IthJ@lm zZ`fc5e912E{>Cwyhi!wq==Q?&9;!nb`uIk|#Ww9^6_IC<7K<=q1D_lb<9lNYRl)_W)h`_~O@WE>Xj9_nl&k{8S)k6s zM+X%wCv(Lq+`KL%n|+2jKB z<9^?P4sFsyLOQg%CA|caAvWbg67MCJ*~U*s`#$%mJy47wX(_`o!DwQao$zu6=a=eg zDbuL5_;O<7|3)po8{Qu(K27Wf!nEc)mp(Ss8}*%cg#fL5$^L%_vHNB?k+|;nb6N9E zN=5GCgD|UON8cFR4lg}Yj_%u=VV!F1`g{3ZMfn2eyEE|j+MNn+9_EV8G37gs96g$g zF9aw%BP0G@3 zpuf{tJ6OkL$Y<{&VjG{%eBtWzuRaqVqvjy0Gnsi{`^o7Eq9uNW2JcPoTMR6FAEDM8)pkI+BfPw>IXi#{uC?tyKF@ZLs zFcGBx$`esgHXsyWBtoJ4XFL(=!9&w97W;3S=RsKrS_{)q)fvp^aKxi=C>6bIy4 zJRD`AM9|PUOt7%O1H#Z~L@W*w91f)Z6BG{w!s8K%z%L9<<08;B0BICwf@9J=I1nC4 zBGeLrl%knHAd048;2ouTFbc^NX(kX5rFj-jJIVybp`eY>eg)$&;PB{QpqvNj`X>A^x3la*81viTWG%O7WkO$Hz925oV)9{dp zh~n@(kOmirfxw|zEXy$t5Dx@m2GejRx{XBvj$FI~aT+2E34=rN;33Tfi^kzR%R3W=GaM8&9Oe@X5eJ1L zVSxr1Og)r_2C`rpko?1eR2&crp*{rDr~q;3CXdo65fSCVl%q6!^E{6R0>Z&OEQ$zu z{zQ>1lZNC$0py*Bk^l;gr-B5|Pb@ebO~b_TU>=AA%+hcm3=4(fz&tn+()I#4#g2soBVk!0JPgDn z0%uW0(pWesfPgHYSZHu?;3pOu9GnP?M#JGJ791AJLV*mQiAa_Q=qD59xo0R5ESiSF za1;kek(2|{Sm0nV_CB$|NFtNvKC!T%03jmskO<*@VqxK!Ko}?TJd_CL(I||4APf=3vMe5^vG$S#P|6QACI@P#y{<3nS7z%3?y=K(JJh|2Phg zq;U{lpga$yfzc=qi-<`>;)#XDVh>TuM;aVQ<4l9mFpLPx0)g?lLOyvY6PAYqiUtX~!90uz5DY{^+wPz!7H0xP^WZ!}736eWW3U@AN!?1=@1A;JX;vMUfsXdIqM^DLk=4+z8s>x$+9y$*3WAZRFy z0vri;U8&^}aZsx9a7-8&4ic9v416ffL$XYog#?AU^@(B;9va7_QAo(uADoB7LPBJL zi6Q|M7!){>vVd4!aU7F~!XFS!1djxK4hteQae;Ot<%CayX zEU-AAo>*WsFb?IBzEB*d;V?*89*7$#Gz{ed+6knIP!MS#3xr`rXdcf}0fK?Y0+3`Vm+T;N2^aV!u_FRf2uz+6Me{f)PsFi24G&18XdDy^9EWKn6c~rXI6RgIXK7HICl(&2 zLS(ThmI=**Sb;1aOa#kBkvJlZhBHAii9kT=XeP@65YWJ+Iiob3iKD?_G!BI35T!w9 zzCb*WGFcv;2nEfvU>=%<;xNu)0i$^!5Ecs5kOsm~VHt`96&@0WfHV*g%8vl^KsX>2 zz#Datd z@#1i3P#h5m2@9bfEEo<)Q9O)7!Z1-FI5dlfN0W@gAY7hCq=AcAp2Y$}R#h(cM& z7|n#i0ioxId7yz&$e%@FfSNn8@GKJv%hEI~kSvjj!ihKtAm&aiL==fKfq2-3qHt&y zj{~DnZbrjtfXIk~+z(~pXc%}f%5&U_g^98_lt)9x4@Q}=NFa{VI5>*}=(i_`;JbhW z&0sVP5RBx}Kr|M3m}OBMy3L3{s0WOLqM+PFyMG1@H{w_$b(sJhlvAWfJ8(Z6cA0*I5;p0rSU)m(~Z(BE>Ij4I_=0} zfiOsD7+5Tt=K+ReslZ^!9~9?l6bS0kN{B( z3#A%|gVA7>s676bwhhI8WrEG*DwR z5fsLLG?wvD917Km-MI?N}xX3Au!+ zFo{6Y4J?brQ6wx*Tvin1wQVJU}9dPz*)`Q5qZ$3LFRn zIaA>wLft$JV&}v{1jK}h2(!SV&d=f~4QKLb8kEHm;aC_XNMJl7QjCa5v#1Hg;V2H6 zGZ-a8EssXAh%_4d=Es?!0fO=7#KI%uu!vwF4}(#j2-M26fCFK8a6lLa3B-y7k%lM2 z!ZbXzi?dW%Fb@s_ARr29Go)dVP(hhc4-(3LILf0agifS^0SKgVKtLc$!_v4`EEu2= z!GZ>+L4kRACKAv`sNjHt9D-S3aVXTxiG?P@fd!*+C>9bdbE75^1*0t5a5x~G$npRo z&oFSQ-wka$+Gu1fnRafq;~0G%(DA zc~}6VNIo8^H?Se zgoRo{0mASgZX6FI!eUWKaDYe{j0OV2FdlF)6e!E1@kkmVY*-Y)z#w4~5xM8#z|wG- zCxV5tG!{?uSa3n3SRf26^vR({MDTL^O*Qmo%Xd!U$-gWmhhFI?p2xW z^_bqZo80x0+SQEKb4eYO5}DAg-5ry@o5}Yssk_lWACb0oIqmHUTl?smQ^{Ic=~-9F zSyhRZm6eip30c+YSkcK?mwxq|e07(4m3Xz6cK=9o^`6jBnQm2^Z1tM{x|3Y>s8#cb zQ&077^R<;wRezB_RgXMdk6L+AcaJo6Z(r$B$x@vj^_`shd_9TTkQUXQ6qTD2HL*a5 z%1?(%hPqGxt4e-qOMR+Oe0s?3@1&=Fl*dhYmQQ!LPxhwfMr)c#YD#I! zOlWFkmTUBX*ZtYVzt^e|9h&LH&H`ht6Z7Daq={AjQ6OCSNpIrNr`u>zy(~nm3oK(}7Qd5;qGn-74 z=`$Pobg45hiE}w=Gi|#snKD!L-2CX$Y{|0G=`pRzG1I6q)ro026`NGsj}p_A5R;t_ zQ_cGG-tRc6FB6F`xoI!)NiVmZ@G`N`?7DR4ii*ojZ;8~li^RsPw3ein{g3>aO?L0} zk5B&S)Q_*kyT(ZR=&5|EgsS>zGFB+fV6!PUwhqj)`QB zjP&iREvp^M*VsE$gZS|*_4Zzgp0Oxi^yb)d-SfHNzIQg1=p_PR%W=uDffNqUG&dG|_qcuIFjcBo8m=)UBJ$kc{OY?w}~X-{gX zONoX?XX;62yiESooy32qw7CDIe`l1RYDf5ObZ?!GiAe9KPQI&3?e6DG>bXbhmsm*8 zLqcby^II}Md-~Q-@;-FxK6m0i+D>fJzD`Q_Q^LOPy-p~0^^&J9Vfje7TmCv zT#*gAomiPx{Ye!mSD#RiPIt)^pKveFzWHs@cC(Q@+tSvZaQiazkSq`BOpP4zsZohB zv2bXrq{xe(M~92Vw@6QZi3MZqran$OOxj~5#a_y1BfP{y_G{AHBiSVuy#1B_l2na4 zkNAz&k<^t^lkAQzvEcTtWY_0F(m)zW8V3v>1~5X222P^EVp$9TUO=J0uviv{!6Y0o5|AbUiG;H}n1llc17OhLVH^?8g9lC`;h=%z z08t!}aFAdCX`F|7L|80~V`ld}7zLJy$d2P_uK0|yNtivv+4 zJdXt;;sAm0K!JiJo=C(aLIog^NHl;!Rbm09KsgS8Gz=QP~FERY2ffMP5QSKxUpjsp~r28{>>;-Em3hy_AEWem}UVE z8Y~V5BhqjlIB*t5!UKo~&*C&tKmZOJ8ixQFJa8T`Xs|dUiw4304-_Z}12~IAX{d(A zQ4Lr?DKK$77c2u%9%T`L2238Np#csWKpI8^95AQ>FsK1AcrbuLg9i>8ED;DWkH`Z? z4Pe1E3s`^w95i4oi$o%!U>uO;L192Ni-!VP9EcL(08uJ94bH@=L?Rzei_yX+BdVSRd=mUt~H~4UqiynGW)t+ z`ZC}0Rx&HaExV-0t8c8`BkX5gdgijX(p}=aQs!E3+C0upI=g!HI?gZEBy;quv!iRX z^0RuxQdhFAetWFdE;e&KEJdzWrf#lmti>np#DeW#{br#t;bUo1H>2~p2-9BaKAMG!D(`{m* zEW5I=?3iqCxwP-q%$nZ6`mDA5u^gFLuiUo!^AGFXtKUe~YSXIM>l-N*F_Q_=tj=pu z>C~*QvHY|)6MM~S87uXwd+F1xvbFTFI*mNF@VUO7y1&x8IGb3_Dh;a@Yj6;`RbJGa0%0_OtC(_mXR*a>hbH;%*xKn(+jQX(F#%P@#_1@(I1~$ zx!AJ!lNhnSx6GRsxq7kKjTFra)#_Rc|0vb0(9zWib!)k+t#Y+ix89Mk()7)$Ps>^B z{>jg*FtskR%#PYi6N}F&ue!UEo%YPCUrX)E6q~yavsj<<%}VzR`wJE66|0x)p6Sl4 zKC#raR+a3mJil7K@R8cA__h|!&Fro^l{&FtY-YvYb;-5O#Ac<=mHO3qWLCRgr(N1i zYF67_d0gp6nOG=8EVM1%rZlU}t=Ft&qf0C#t66bc-DR!b?yjzLveG{{xvM($e;)BC zI&Gq_lIHtU{#z3MAKgD1*}pBhzbCc7BC-EBtv{0bCuOcBp|jEX|C9IIQnxR0KOU`i zvSPpGbE=Z_n>jK6FD?I(@-rpBJt6-q9se8I@1K(Ie^c}RBJKV*>3(8ibDx)R|LOJ_ z$@ZIE|CL%_k68bkR)3dNO_Nf;`RVi%$@Gstx5?e@o7tE+|B?3lpEUoUGXI`1pO-x~ z&B^gSTYZffk@acu|D6!u)sGp;@RR-?m%I-ViQm6z{a%~${+96AmhRr2-hPwZp4#4( z*goz0UHR<^?SJX)pOgMQv7nxbQobZ96|@yCY$@BU$qqJ^wR#vnvv_r_-J) zld{`VvZE8SpVP6MldC)B6TD`pwuhDynZ`+ub zQ%TWZSKW;e-PyM9$j~|I&uz)gmYNy!Y0ptnk(u($oABJ7?3|z85R-hiT~U45tlNrRq4LTzLDv@Z^^xlsonjY)?Jm-yZQ0y zd|Am{lf2vc+24p8+i1HJNxSU{ultyuo0*(@nY)daTS>X~D7pET$+(;T@t=B|F;i7( zx6w$quPJe_>9%pdy^mb|lv?|KjcK)cNv}64O>qge-{`cH$+VG=yv>QT`DycCF1*$+=Id|BU$Ck@h*6^tqGrd6Cf5 zobK7lp84n<&&m6qshxXipSj(Z>74h;oRPlymb}@Sx|fN!SxLK%5_^^Dnj6WQ8IOF8 zS(lbsNtsP4k@@JDUCH=!>6epy*_e8H?{yzZmv1Tens8a2ZrP7)d6!J^f<9VdAx*Y>}1D_^v22L#(am2&9uvBq-MOzi_3Jz zoKOFYPIOxw#( z+T4`A#)Q4=biJJPyyVBc>qp6ZPROf{-Hl|tNC9TGscyu#=SX{qn54J8YfpG=hOKqW zO?HcAdf#kri|F;Y)V6l1#Ku~QU9XCywr1yD%S>tCd!>u%s>p8DN@n|d&i-mw$?Qw~ z`nr8c`pQrFYR0-=##^eNule+@UTEZ=omr@ferC7k%DC%_zv}vGw%_RL`*f~my<3(` zc4_5`mB#d~zFC-8`Agl4nB2<6hvlByT&bR|=i5iv{F;?$W%{V}tX6tjXGX5B-_)#a zS+7mZYRO8<%6~}7R_TcS%F4QAtY+_Zru2J!a?r>N2;2Y-?gcfs>sagbgHI$h4}Tl^r`w@ z%bBW1o#tz=o92~$+M2JK%D%pIsb=P7e`KkKs^_?7zh1BT%&2>`sC%UMSe}%qnoEef z^}VfkU*=wBO@``Uzm8hWSm{oF+SjMcil)`wdfL^L=Xjb}$n&K;^+qK--P+YR)H2t~ zcXC|+Rccd4B{n^$HSO)%P1193O4BPu)qOKGuU@79Jp4*C@=w?GU*cb5ru{VIBmKAH z?fhIX^(Xw)RipcCC40m!SEKj7QvKR=a?h;0El>8$`qkpw%2V~c^{4bS#JUdCGAGcHtcNA%G_kK)-JrOKcjzM?Iq{r z#J19VVm5PH&cuSQwsImTCFi*ka(X`rq;F>r%`Wb{b?aS z@n#nCmwwW2Vr6^jG3nl6EjHzHZFZq&-CcDeV<|h??k>}_S>l%WH9xhcmF1PHAJ=7` zk!teeQEGbeeK|X!W?!FX7OR=287fwC(`RPgZe22YCTE)YXC*RmrkAB{rf1eNm!(r? zvTMB_VRrFZu^L^buNLP%av`f@X0>UfRXZ}Sdi@(M=9YEWBa&j8-D4teAzQhtDjn_~ zll`97rn{0~o}yBpZE+u8;xFwb+w0{Y*Y#5VmwHxbmhWBaKiQ?3zm|H^TfUoTX;NE$ zy5?;gF;g?@Vi_tiV;(Cl zD{E>gt}U(dR;JdXVwq~5mZcIZc3ozvcj;?AGMRGOYH?mIV*NdNawei>sXK8Zmfe;f z(kA{RCTg`lWnxxiQy=nnbtmzmSs(4;9T&Ulq2A$MmaY-5VwZE$9d_rE9cnpy z^&dUf^_L=k^?S9te&RN*Ay(ejb(218v2QgpJ{n$~Zm}L+XF3-4k{PD&(|;bDlK=76 zGq>^C691VV*Pqh(5 z((PGoXZbrBW9pfezt^Jad2M6yEj4x-F8gxjeR(&no>kt}nH4u2Kk^Ic=V;m-&%8UoCRxUD;x#ntku(T{T-7BU!q? z9X+O(T{c&;m%dTs@qW1`F&Y)^ZekZUm-iF0l`xm(>SdL^2EsU_WeS+ZTgMW|$Wl{pA0o54r` zPyhoE0000$V1RH)I4+@;>C>nK6aWFHvQE5inuEv;002M$F#rHaNJszxYm+Wab-(Rj zP9!W)ZU%=>>(hu%pG6kr!8ptXC#%uLPtDb${vDU>^{BCgIRk(VM^>2cqIR{C4V?6~ zSW!Y&Lxc{Y%WgW@?Vqc1>O?9IT>rJG(cKY!oE^qU`G^=AE9UVCg8IccKjuB;O)KD! zAaMQ)&>JN!80oO~;NGo>$v=*hWCxHP!B$T2;Ii^4(WhdlT}ULVyWU%19BN=bNs|83 zf^7lR!6zV?*RiM;${-})!T?%&3T+0GVxl;4x=7gt_VCA9_i-%3h)LD6{{{GM zZ3?P)SCko?LQ}bl63kI{nJat!>byNpwNxbgM0Z4R?b?Wn;Hw zL)=v7th_>cu-J4eAOn!N6=D#0IrRUX&FVp7kNqWfSa?GmceGM44f9m@A+|W46DCcXBAzg&8HqX3HZi(ax>_(np zcpq6hBCrts|BV>KiPHy;gNuI&R7UWE5IAFK^g#V0A6sG!2Qhoi-;!Icorx88gCr|R;H4J%>+U}8dTO`x|O7tl6^dB!Ge@WKFqfh$1TdyH3#m}suq$-?0x58j zNHlF`0R!mA$$*;%ZEn76RnM0|-z2>}A@1*45itYx2upGq+_o72D6nJ>c3_R|rd~uN zg!8We7(Wx?10ySLKdzm!94;$2V-!Pz`CKiFSMo(>P?{+G)lthoZ8`qsNEDMt<~-uL zFQ+_;3lV#-;d0476L5#zJvd(zy`sjuLoyIGi(4U~Vh3GkBYa=QzJ2px`zWYtq`~r) z9J6U{TX&*<+>P%jz*wSIU_m1>qVtIw+j94i*Mj)GD5V9;DM)NL?)5K4MomfmrA(-Z zXnGMaw<7W|BSU46VBR(ssh}=MzwSF_w24+e(&qvO4z(&@k$JfHB|k!8D~s>MBpK}h z?!6aCswRZfzP6ckdQvoX!3L{=K&ua=fvBe-ZAY!eG$l3AUtlhvkM#8BfT!dzgVO~^ww{~m|?S!Fno-)Ox8#T6nCT= zjXExpPU#D06Bup;#MW`d?vw=tZDenOx_(BO))6b7>!Ur(Jnfe5I8^(LVnK}gCTrTg zPB)|EhyVwa0~73F>kdGwT4XeRfC4?@e;@@30oo+KaukbPRwziio z!+?!6tVZANM!4+^4SPZ~ZK^=jm}+1l79SgCgl+#+GN9zfr#yB+pt0SkME8`vU*EpC z(18?Q$=s|+TYfXu3Sxk$$qJp=_~DbuLuB6rNt}kLU8$q54icTjcX0h9D@!;@Qmta< zZoO+Rq&dEX%IdvP4eKhOSrqoezisHb*3psCr;RsJpr*Nia#>Ye^4R5@%v6vtls=1W zA|)OK1?8|!N%Bo&XS+;WMK`Onq8QLhM8jAn-f|U?IghJ}x1Ur6ff6SQ)iNAaQ995$ z#ce|xsu69-_gwU6_<3M|+;Z;0!;U2B%w&-N%pE$Vu&`HZ!ici71H9Vbpo2-IJ5$e) zaq3GEf6osoFMUpFBpCSspDHkgP82Ognou&1!S;x7FT$kHiG#hvQt3S^=yY%N!jVHb zjqt+zjTM)Kd=_w(@3Zj4oPdu#s&#W#9wyp~`YjPEE)4>Confn#gz&Gq0H3`grbuMB z(M?Ndv+oHGO>tS50|}>|=awZsr|Cx3oq}#8C%e}up_OWY$I_ZeTs*%u-v+rj#^*oMLH{cSN=H~F%5U8hPACYVpR@z@1I{}jx&|iDcFDe7$+eXB@ zST!f)5KFy@MwK{}&EwDwC$S5wcUHqf-#eL^-6@(pA*?Q)bf;neNTU|*d_uYvcYe!0 z7AsCcy<>E}2XM%n+RlHd|A_J&+MqOgs9M^8XKM!YtS8Fo^xw1WS3qs}WIsirFn^E&7(zj|5$Nh_t68Ii;BYvLQeI{??--v45Z5>6}z!hqB`JP>of&`1;6D+214}C9Hbm7Zl~REa{~8A)kg+yAjB%YJbMy;)*aiJ%K&mU zW4LLW^T}~gYV%~BOUGBPZI$_@#0@yqhoK=fN-!50xh7|_t>8+Ce`*g+wx}>i8v2*% zIw{l>&=21p?1pmtQle@*zzItT+Bhp99wOV?PN{R8bAj%S&I?MaMFx|Y2B%jgvPt@K zh*i%6w<~?XS!JgOebe=1hn!;U4~&~B7IT@(p1*z~OC?!${nGtwe;Wfc4<$9{6ktk7 zh{K9VaEO%PZErGQ{#foO1(#=Bgg3%%_=yP}R4T_f0 z5dzxsQq&@@?ijY+{~|wZ#pURfJbdPvTjHIOc8X6=x%PJyz44dN@;Zg6Y17fPyRm8x zteyK_kU1JDUatATzPVuZ3CI{ou)UaswuFn^T2lbLUI2~;hsBZkGy4DLShg{v zL8Qq#qXmW}QcL~P?itd!d>J2zWhokAaQA&mgZ3Bq_clllNm7RRq! zSbEP2xf*j(5sNV=;l@jd>KE>8=CU~WnOzGF!=vtf#-9`qNoX?H->6hLJ;y(cU+3AVZLDG7|Au$(l z{@eR@Fv9yzJ$B;|G)y0$p)a{>y31YQKvD;JN`U0A9pYFo$Oq-=po4Ii@FrN=f8;nt z8>HMH?RCK^vr`c12fNdwW`S3}o@%?kITuRM;GF&E%6-F+D~s>@G}yKw3hmylq6r8Z%t#v98EC$ zDo^AGITq1Ltez}-O_+vO(w!6^2R8Lf^NJcQXsCoZ_4?Bz3k(h!;-UgWik1q~^9%*^ zb3+$Q5Gy_D%qTy}Si!@6wdyIr(G5_QAx9Jxo}B;~6GY7kDV-3n$<&*%Y9@dzCFo4d zDhM)Qr@EDKRr-a)1T>r?hSCyf?-i0(|KiJf48xBGU9rG^6k%IzUwsf$yLcnEi~x6p z(+c~1olWgA4)mwSVENye`e|OB_?OK65!s-~NU0oaq|np^1Z$93QLvC%3b7gdA2Jq4 zgf5Hm0X&~F#`gzj{FU$f76C1rN>xH?E-5aC5w?W7HZY%094y7MqdHIAXDQipU;LHh zqThlz_RMAc-baqK;Q{{{84Y>%-qdwd&?Qini;4so_m+4 zc^JP(;)t7=YJxP~wnH`#o!b>%DJGk{qphF#ZF=X)qkvFfjUHL3(lj>IBzY1^&AC3O zzoq``nQVk=FkyW9IPpy{%%Tp*mn6cy}n7K1nh3 zk3>PE;^e@d6NP2ErYe#R)thVU^&2SnD7}hWJ!;C^>^I*do$il+K}^aHcjmN7d0JL9 zlJvs&<1^(m)@{e?gWNE4KBTMeblMK-s~rTpc5M3h$F28iT|?>b6P>9S2G1poc3j`@ zR-Fhg;Ghm~r04{`_!{6I(#WO->75PCY`{Snh`5CMY{RBzDw97xZKfqAjGd|JTt4^E z&IpOK(3iX8?iYmeJ2VLJ-Fi5SyzF+96u*^<4{7`+=zggx)5&D~0sCr7W&S-c!zG%( zAE@3hDePE5UC4AAY)9{8(ZAC#zW$(aN{Z(cEt!Q7-zjhQQu)Aswg&Ncs}SYF0ua?- z^)6z~e}nd1DZ#4K>ha>Aa6-@6&!oB6MM43>h#S4#L$_1x7*Rb*G1TiFoysq%D1s}d z5r%b7l7$5+gfW-d33i7Su%XWkI?uA-d^1H$zMSIs+^EPu_^a18Q15N&^6y3T@>4e~ zuwWmjZ21q+2>yoxRsLB!CN&xCg#rKKX*j=Dk?v3Mp5|}ojs36`HN80Q5ITa-21Myv zFaH04`}_pp+ESX}0jGYCIwMYRwZH(>h5i};`dk{){@+LPYeEFgj6+8Tf!E!DAdg;_ zC`otXnkVNbXbvzjR{)5{bTj#i=NNXU!*C6cAVBbi4>kCWQMhU=VEFht3AkYXg;#wr zT`~Iwk3WS|0jp*+jlBygC7ZR}C{Y(F&||oZiCjfy3n^w~Y|P_mt7* zFOS3h{t$7eRG}$(NI{se@J%=VjO-502yWA`O3Yx)8Hk@_>>O-eSoin{Ww3R-aH!9*OS~J79#l zs$hoU#4KvTyRwtaw_r%C9ekLs{`fwUw|W+xl!%4}9UW zpm-iH`6Z;u`k~q#OrhLOXN+oD)7cuRJ^4QIV3bct)u_h=*KlQCi%ev<$A&vSBo5>k zV2uW+?p)|Isg9mUHKsgejzOj6 z6D?RdFMEn*Xov)d8@A<;i{9NH2~T4ms`$?06>SpI*eq?=o_pnhNg>X-(W$N zkcdaF3#xcYdP&RBR9%^ir6bs%C;1UA1A}d(p&*eUl=?T@$-9CMOJ&9q@{VO^SVl>H zZUr;Q$*!4hJ9~M6_~ag5tiS#ZVHbJP>(hKrB`=Gq#|J%$f3-5N$$eatFPMu>)C)VV z+1`s|*f0;=VkxerxeQdcTP8Q_@=^YlV4UCi5jr7)VBfqXa?^2R4yr2bZ$y`ip8=ZD z8|fzRbfoK2#2o_>+{%w&==i1kOe_#zG!~nwX4GkQ!K!ySNW?I3nj>dC^nU1KG(J&L z)`c2~*aY5(%J$ejPh?yz*g~k!js{J&w(}3ZjoB-0JfTZQs8`&}+V8$1Dgwq+k}bG? z0glRz#ZRl~T3-q425}Ev|BM!=3i0cmw&gT7V@3&AV8r(acG^cYO(Qon=vyaSumv9` z@00$m3YwRS%gsJu#65NQFEjaQAH&3DNgbp4#J|F!o0i#@QvU@Z&vL_&8a{KXOV`2t<^|5PgEp_zDguPST(V1M5vF zvV#^>9a1kfvBfQ592&PZ@lM^_)mk7$>{UQLmqt$>Cn456I#4|{HDBui9Ca~PncRb1 zFJ4?1eV#D)={JpII9y~BrD}#!qlp+_Tp$#SU3gdbFUgl~8vJ4?SU1_wFN_o$hMS+# zjn!VNyMucD13R`eoqw0iX0gtu9Q<`Skl8x@--piv;xZzS`Kk9;HbvmCp_f+Ed_7d_gVrY{$PNwpO^M8I+^7X>5HT1mJ;hryFwLkP zi@KwZ0F$IpXA`}nz97DFWRZPy&dc6#CAhVwbsPs?q=Nk@4>J(@IT_Vy73In0R5|O= zVRVyc(aX_T_n@P+<2*MdRcFKDcH4^sAY)70fQ;W^&Rd#T8Sd` z1OrtR11lVQ1(h^r53$NZky*oqdW*f=P_$Ht{G8_INR5CLp)sk(y{@G)M@xXzxnLC43VsR@4Z} ze^dJ$*sww$SPWi|dxZW-fd;AlS%H0CT0DEA#mW_b8EAeDM1s++oT{7jlqtR*inrI)n zmjM$m!OcO=mge-zpN|4^uWJI40H-v1wDHG{WrqY+tdAt9E_g2zN1l;M7>xS>a`=J6 zuG_z$;6fOa*eqqA3C=9FOBxGr(y~Yj#~+=$=m%hH1;B=kQ8=;Da&hNAlf*PeQ_7P} zk2KQ2OJAG>d-okp@2=} z?@^Z~5@;3#SIQ0j<~LlnX`U$cb-snojo>H?r^pBGrnf2EL;rCqj*G5Af<^6V6ee>d zkEu^NHMv?@bt_T!F$}Ff20H-uD zLSSzt%k*K~LFbo%{faeIJs67X?mIRaqGvET3(oYL)nTtqHHebDP-4seqe-1*-3_|3 zHB|p_pVgc`48gh|D0+SBP&5U#4ySa@dOgS!SNy|r-KA4|QCyQn824=d>>SiuT-X%$ zqbrMta(AN7IgKv65X=0{p=BbeUkE(9+^tLM>2iQbq)b%F{QRuD2|eChvNgCpoOhs{dy%Fe2JwrW}>e2Z?OCb>of}aujpBT3w|}d+^iet z|2#oTOUco><+q@YYC_i4D99F%`mt^|0dkJ|?hf6VYff>Q+3gg|G*)IXN*ph}_Y7-PH_ zHkuksTq!QYmEs$8hIc*ulIWBEiBJxNTk@Um`r|g-FzPKIBvg zaSq39!(S@spvo~iEjh$oEb>H@c*B8xXCAPx$so-=bfqp`GwMH?`m1h)1j^>o#=zdL z=D#j9TDjB!^)mlzU=z!2)6-s;2b~Y3R`iu=-y6rEHMUW9Q@(C8@}`JwZv(qIA*9TC zWf|9|$6f2gh-tWDDz6B4r8=xrIW{>_Vn02+ujCR+0maD#=3srZQd0Gl@h>KFO%L}H z?wThO6xs`=Xo7P0ZamSnm{6{dXd@qfGuDHAM@l)gmVRwQ59;_SrDI`tepx(4q$wyy zET6+!aG9XP;6FC`YyM5!4o}WvDR=z>%>(1laf-Cb)sA~Q<5V17usiGfD@{|uHM-;X z1ugkEN@HNZ6sL6I6Tprt4yVC66wN;JEx?%xrxfOZlh zM2~wYr<^+OMENhJDfENf^Ad++;KNmhKV;WOp?kTB8cDxd)D85w%w%p6@p0a%xw#)Z z%AZdnHu~x}e~Ft-xI~m!5kMWA6!lvs@rEFqMX2*Kjc@2VLYBL6>@sr=TJi0DHK+`3W>0@SNZziz*S3fi~# zo8e!OG47i-1&AEsq+HXw_ZW~nvJ9=?4CsSU8rPc!)cW71rL=B9_^aKtu2vk7>#3M# zFA4|AO=HqeOMC^LHUaFPfN(ma;e#;IAi4ue+4XWdCA%KPU98ikz=Rgq^=exB=+qq) z(LhO8Ex@>2=b`x*M9<}ItXUVyp}3uZ9g%jxNddB+B{qXRVLRqD=(6!}y3YG!|LgxH zzLensxA_5}Mc?}&2yMWTUyG9X0Dmw5{SUp)>~|S*ScoUVSUv1Ws-B|}{MXKr4B)Gr z(Z6}l6;Kz%R$qV&bKFh6u8l~Z834W2jMT!9pa5yAUiF)Vu`}CP>;6%}zzardlrPLD zd(G7zU1|R7yY5jcVW{9Rm1FF<-mb=GqqN)+z8iZD>W!p?kqTpnEj?^sFT^}L_hv@p zlouSp5labebw)w}h_M4cI(80Nc_EC!zEIk_83BWMddGd{=R#9yN9x9T{vp&j}L2B4rHk7U|nY=&YI(q(-P% zWMbH$LkyFW*x-ORD3dQMZ3ECaKu_HoA;h#;BVzb08_^zJW3xL;l^4$f7e4H+9}qN~ zoHxYNe@E{RNIqs`hJClquiXJP<$z%mq_h=U_^oskmbn@_VCRv<$M{1=)_W*bZdlr< z*pQvxjb}r#X3f?Dg&#p3EWFw@ zmwCu@{=>-g_MZ$%I}dx=h(h-<3NWl)N*kE5r^mdbAtvojx(5|PNk_kzM@-qG`jBaZ zH};YkZMprNSv$^wAg%?Xw7^L3!_a)@b_tX?QiBnu zpCHA9VPk6cKPITC%Z~H=bEXEG*6p&i5QEu`3=+Du`4jaSe2Iwy~E*Mga-SUi!jzevo>@$nqTP zaM@WUNmJuVb)q$WB>F3&WD|eacp!V#?&!z<06-U3;UL zk18jp+N3FA1Ppy;H|AW#?@om17|K326M(=#0xbPGn>C8&HJdeC`5nL3OMEG8jO`)n zWUzwCsN`~O7v^JBjr-k0YAlzRIPPH*UdmBu*E$C}$}CE%nBfj_DjD4Xe7AUm-*}r- zZ&R&%_@Xpcbc3nm91)wwc(f{|os?|LFhz>)Rs$(Q96LNCS4zcIca0ct zEZ|$Wpf}-1ZXAQX0GD_hySJbR>s=-p-fpwVbC()YJFNYurnP;%I9`}2$GesCY3hl$ z?9Y%BW{1&014QJ*k}#r7dbA!&7F&9ajLQ{3uwJ@W+Qw7zc_vbd*K9dtX5k^b_e3MT zS0O2NWHGa34#g=uwn>Mg2FC}HiN%?Y$XB@~OX_czOvT>F0gr_Ym*xZpCdA_r z9aWs{ulP^{XSJ4T^W{#0qqi1Epz+wMvoYd664!? z8_M$vUL%=Y922z^q^apJiQFX`I*{|b$E4~2NLtZBVEnr@GY?i74(YzaVix-1zuNkV z0bX|I>Qr$EvuQ)`TV7d@Rzq*)QkOza5X3oJJ`P^hIvl*hGaK09T^pY*- z?5L~}ViZBT)D$b$5cUDvC(KYY_C;Y}=1*@KY+LKk(@wZ|I)Z*^Xi}p%zM(@M#IAq z3C&i7^qnyn_=e2gbBWg;^=B{?E_>oga@w+Mlx%2qcBAo;dZ+uxW`2lWf?ijY$Yztv z))#dpdotP2mUBxhUgVj+W}hMr&Y^I=RJ=I&RKZHMMqQ#FlhyY40Dw{>^iuF(Dn+C$ zDj#u!|LB3;@Fp)sAwN-{_iA&D3i4$$G95vKBirlWY=S$Bk*3C#EdPuJ+FFNLnicoJ z52=-&ji--tz$q$smRPN`d~_#Fwua*eUY z)5tdlxu;@{nI|)Ic)C#k9#zTZZ|SInOH0#A@=jeM2~QEiZnn?|z-6w;9Wopemm;_T zMO_&ZxxYDxJy>q&UezTwVDvfkW`Owf&-yxz15PxI0P*@1AJe4)g+Abl<6KHAKIr78 z`ioCCUBBW=3_iqTY;_{O8;=}5K z6**n4UCpZ?jM)YDyAY5S{~Y2^Z^*v1KM}~Mf(;C*@8;~;M4DwdHd9k6R-qhr+J%on z`t-Ph>AR~7ic4i;)EWTd8rlb~A`y*Q9A%+S<`zjc$p{Fh7HSV&Y--HCfh?}cSV224 zZs08!WDYexv0tp;=zae8J1jHeFo-HB}!44itz4diY zCnsP{HvC{X11Q0r7GR3dfV9e@N&t;;#R0JWbr^-B2}^Ts!AfbCU!FA>nRt}k0sE>l zsE|%F+5N7^lI-l6=kl6m4H2ZhV zmLBUHd~Qbg5bFkLKDhm1KNNesN%Cx0DK>+Qy!;8xDyuzA{pzEA?h&l18T*Ea)cFj{M#cMx^rjpf|49|3Ozc8ZjCHlE9(01&a+DK{t%G=+=h%QIHVY1mzHb=y zqq4H&38X*WF_vHw9t+y77FmT2Ji1puy_YdSAl-d8CGJ@d45d@x?q#Be%nvrGHYJ7HU{ zW6tXnhSqeP1CjzPM6-J48a#~Kos`Oj+#fw7$?B+@P@Vs?yCnvG;g+>`>)ihM1gxV= za^UmdF+k<3g-R!`g7WL{}Dw>wyLlyPnyJBBpk z@%F6CKeX*BW_^Kf$$XITS)+?S$EDd+iG?=iMfJ)MWW!^n)B0yfr&(LC%%4~kPDB7g zLS5Lb5wbPE#6gfRE3uEj*;;}yEL7n`A$zw%PIH6Qzs@L z9A>Q)GDlcihB-Tkymi|>x;F)gao?4rtuWp8Tmp@!h#;w`Y+i(dl4BEI+emYZ3xerc zRCp7@uEYu7CqWP%F`q?^pbU&7_UhIUygU3NP!In9QJ#j@7Ky>=m8u9%ruuz#R>GyM zpCCi>o$)E~n^ekU;z-g0C|qzhkI$Z4sHZscmNI2GZZ4i=X++`oI|~=i?+{a|j?fDP z9h8D4a-2D6B87-$ze3VR@yM&)ZOO(+BpSP1XLBvhbnF1iWI5F&rff{{onx$mVTj7g zX(aRa>$kg_q7#nC+3(yeVvcoJGgjWhM6y>h#T_Nfjb&d!W!QqbF7zzcg?;JG779e$ zrHvlEBreaIKj2Xh9y0%VfD$}H86)OUGMcM`qCs9ZJ!P`E*}H^E4g>`+P}h4xIPoV< z5(aQ26GVGLo?&O;wp`F{jt3Qy;pM0F9Sm#h+Xcgdj+OU(m9dc*uZ08KYN!M4aNo~I zY$Al9o)G*kgE(q`$bJ+sg}}cX;ga?iSK}{>a9Z`rg~i7*-=_lSNFP>d+9ixa_#xKC z1v;;R|%P9g!B zqSs7Jq2e?0P-&fbhr-Y=?KV7mA<|x6U1JI2&6uZkLD%-5PDm)w=ey*$42DGaTCB{E zy|4~);BH(lSE(h##ns`NJ>V&?+r*{XKJ-=-c~@}R0+}C+jBC?t5xtj)HbShQV{O0y z;I~HbtQaQpK!I+JMj2o~udFaq1^CkFy@TDHyPZ6jI1XMaMFH(D{umfB!tgnVXs~Hv z^pXdU2xXD^v5yjCC2>bC5?5KMNMUiP6~?(?%uoAr>9T zU^qZElD%q$=Itf;Q44ZE874F=+Nc4*%au|S3@d(sBFm=B;eK@k3Z22~m})?u``$>K z>U7?5l7jlO8nxhNKi<6aB;r1hb5x&ta57x8YPc3EWK@Qwy*(m)GH*sH*qW$zU)W!X z0Jplzpcwd&)gt?rnebhd`q7hV@Oc9Ol@mE&AsK87pAZVW_lMeAvBb$kO?2Y&(HP1* z>b5p5UM=akCjl zq3~YM)+dIq@Z0;zmtut`3Y`?Ah&`Vk7d3}E{<-8U!-{);WbYf9D+ehu@TCgDx^-?J z|D(eDZGAji?Y@r;1Mj-hamB9ENx*%o)79yph;;Vs53@>aFvxM+!}?i|)k1hWN$EM< z+L4>w;sLq{bVf5JX#nQ2ENJHvr@)`I3SYwV^jCWbamqi!qLRe$Ui&^QQ2Bc5&0xj* z8vn})!@LouRtUVLTP#O4o_t@u45a(dbq@rWZ&pNQ&3cq&9v<(gP!3-mXjJ|pFQR4( z&XcacLymcT8%3U;wI}0hay*An93cuXEcGruKWdO$*aXu`u^+fg${AnuDh90z_wgIvwa08QW-|fNavne>fiz%|tqdw0mQSG2f z_ET5eTw*yZvr;#YdRBZ+RN!8pDC($Nwb!eM8F}4X#n~CAhKcbZCK|UDm-81f%7FAG zNlz+wD>|`PY-0A|4Q(I&eEMlIXmEvz1#1>u_bGJ*ZfuQ3uGsXe)3;`Opg~)%L-3Ji zZ62-q7lDmA>Taix=fQq9^~~?|KzN;?pW*3f3?LynC12=A&E=mR<4F7>(5c_)Z3~;r zs{Ve#ZF4{IU?0KV?WTy|=AW-JT0&S)Ns36b%A6ojO43anUkQU|pyBvRN{aTmVi15# zO1+B0+Yu6r@U{Y^&pOCA06&vJ)29q<)8}wTcibssEbkaazikyvETzhBrQJa9ZN*@h zZDgtDICZapNksrnK(fCEI6bB5cJ@@Wgd4x5Wt%hZ*DDhy!=m{y{c8$8`bn^~j2nB! zc}7Wu@X2H?u+pgn`idCU2ntSbK+N9&!rTN4Ak zO2H*zT7*>2+0nl?5^Ro?k~=#qt+J$oJtPUo7jbY@DKG1k4 zWeRgCaYmov)DZEho$6_h=5BG7;%7BHQ>G{3LeWzZ3<0X^qvbsVSIKAO9?57ht=?4J zTJ#lveTXuC&daCg%8Wj^RXk8rwrocyc39nI%U_OiT|MaeKM*JTXapV?OQ~B@PT6tF zt8plwSa(zp{tvOPa8TzBV?oz*j=M-#WIF<>Hsc>sI zOd^0|&)Qh@0_#$`)y~BM=s6o6d&POr-TY`IhzKEvP;rY$76{}gnR2CoI$AXUpob*+ zfz`NHB%Gq*;pvA6EUnoz?1TA{lvE2F&?b&)?{9B89<#y?llrRg&Ny%XV%$l^c3x9q zGv_&^(ip*p%OXyOlP8;U`MD41y?qi;UFpq0hkunvxv?3Z)H0FI=~1+Xg1r-T%UMQo-6aj05p~!I*|3dPAysg}7+U4l#-|w=-&87JK=p_1KC?uuW z;R-b^)DGsjN7wH-sta$(jB5Hlp7ZjFoUQK#pLmeHm zTPS85Uf)JT2-J9K{|k()TLTuTT9*1XF;HWl7y?`wVlnefF%d)n4||^H3Yj`7C_D+6 zUwA-Fmta%VV2_pngQl_4jK4M@9N0^qQFni`B4Y`475~y)Bp<25g6Hpp*{rcK$`YTM z(h%xUz_iRRQcU~;w^3}s- zAtG5Rwi~&75PsoiVGfg6fjrF(*k5!auR9&G-z#0P=d1~fu)hwK22$CxIs+;=OU z(&5u^fF|($_C!N@;ge&ac2DwoH$|0*RXfi$&dJ%={m&4~xYhaNdEb8-&9rb^A~wvX zb&Phu-3h}IU}M4;sM~TSytJFzL&0Qmgwc$6#g@Wl{>)$%I9xV7@WIp$MSGH%;M#G5uVdT8ZJ2=Q{>M1h2Qk{RuC+>8XMPpeh%wc*7u45&bxTtPB^N3Qwm zxlfmZ&VIJ=VUu6*8AdT2r@|pNYhJW$zx>>y6HsWU&@i}(-*+tm0 z19$!XFj6-j3OAggX?P)Y35V*$h@jQz@EJzXMCZ1g6GWUfe+adzR3@I`#Q{_zBg|f5 z{T5@N8^nrq=<|6iB^M3)q!bGF@G_E7DS zFM#`H&VQwsaN_`m3fTTg?cnD?EG2AqB#R2@);U5g+h+azf3TuyC50QWNM9v{X(%{6 zZ?I?C2>;DPJRzm`SUC1BUlDv9^CCTEkPz&ZHeDY)R|u&_oQQFYSkYPS?Y}jmEm~`#2_iNtrfh5$2;}F0h4s%c? z=gwjSA%t`L{+dC{$xiEp#cL^3ZryR=hFEbP4hZ9L#0(`W?x`TJJ8W-LQQ0l%XTBVb z^q(@qOxmzjT3=JiTkWYydffQPdeX`V`72+q>6;v)8^zH!0(A@V3s)6gzI#GP?}wcI-zre1A^*E<_M>n^>DVC_}(R zb+CvReo% zmk9NXQquFdig=owY=x%LVr4`Z6(f+APuciq5AgA9@F7(FO;==!t&W8a0A<_tWWzv3 zlsj0GcO7 z_VVv19)W5R39SPG%18{UNIX|wUEku@?h!N;@c7#s#;WJIyMplwT z;Zk^?eT`G~O%fE7pB$Tv%oIklh;4%;co2Kgx>jvWe$|9t3zWwNvi!6Zd`V@lF+N{X z?61@e?L1cFbhik`?tc+jH41Ald0X7iOdX4)zL&|Jb3Rbc;h);e^4J<1B?9qPgy^rQ z@Bgao90%&U2p$uELu$lHbAt07L_Im^UL9jvp$O~THHwv4ZRE1`9 zb6F8q&MZeiU7EpGO^I`+l`45-^!aLecD9;&HX;UJsqPLESI|B@Bqv;9MU7qQN+uRd zhxO@V^9kL#Z%*Hd9tu5IPq^nq)O=kVG(f8Eq_Lf9yd`Y1QE`f$h1ffBDcHON9UD9t zoDU3c^byq|oH|Sy&vCc|1S*bnu4JV4KbAjib%C1Q9{>JutE)mcSm(OWMOn4hQKLQL zWXwMbaaZ~K2NURQl<83NR9_H6XYv7*54tCFSm(!1qcVj6CA+*w8xeFTUx&d3;HG%zl3d*W>Y7zRClh^4|9lVS{jL~L% z75o_^^`_STypxA|@Fk0?*iMH^{qJ=N5IfkJ?7@2grhL;U0hPP}t7|Zt5wz^cco<-nRJh%{FI|( zsZ$PM5Ih&L7WD@AcWim#UBI_)8YJv~C(snb(GuYuaSEFMUSSP*`g8!e<69nqNSfGt zo(-eYrUY9Z%(GPG`r!diCY00g*7DW-CNrcApkJ5C6li)Rfk=P7c(1e*pIK38uCscj z=@-2N#Fz9%NZBO_g<>vF=gU5N%y<0C1+IXjd6(N!VSp7DEP~m`mZzY!jHK92B#PBg z{i*)NSI#40koeYif%%b??sf=^}Mwhy1PJ{R|r=e;%?tb`Siewol zSU~UEXb1(ArK#swlDRg|6#|Xf0uBt9KT}&c=#O;3Pdp~ zqtwIokD4LC5nMMo?+*a1@b{=3Cb{5R=V$W+x96G*@`VEB=9nH?tXVDnU zRLyt7nCv!F$CMq60ar5K+KZM11aSWw{sG1(Jk) z#T!g7$;>_cXyqbJ$^mefOR>DhBDJ^>hU*>hNlrrP_z$OciHY1=e;X5OPCK*mi^#1w zLIr4dY36plN&vSYVZJQz6M~NkDd`$byHT!EwV=!a60TTSol5wVy&y-_oTH^?RcE zO`2e1rZg>?!BN|3ue~~m)Qb2QjNo(*n=~;|RWut&K!YSi8s7SYI=WrLM`u^vr3qbo zB6v9TO$Sz5GJdO1g<@$#BT|)__JFOKM`;zO#HFae(+j+NwSK&c1vrA>T{8kws)VRHDj9N2KfQSyARI5~?L+`4+fOj7Puz-lR3QyUL zD2#}P-4|LMiSHxU&5U*mr@6gO62D_46SiUE|1Qyq&OM68V?t01j$cf4U+pJiJ#G!9|uMR!#G;%W53{!Nv|H zK6}|?u^Ui>tVGasoZG$!5_nR7*rvx`{I?Or*JkC8*CG2qrL(Flf+riPwrXhe;wCwH zrZ6ruVW;!!5q(1*#XWtbR!^y!e=t0j0mu1W;Ai2nfv89N$sNwBY7zz*uZeP>-OtP97jGxfo z5JsCxp*l)mz9uYh1}rn9k7TSK7UKf2^C+MMVNP2HSNyM%G_(co=W@tdNCaI*0M2g} zBn8DFNL1k_$CB2oc_#!brXv-gzkvq~MlAEqPzdnpvP_PeH?oc1i!Dsj*BEeFDGa5= zIe=@Uxsr*7)m9Vv71u+zj{@j%&i@h}prL2vd~{uC?Xp9ZOfg6yXmJr8H)XND^!D?L zX4@VXkk!tC>}Qg@wKIT17R!1GcSj6CJD%s;WYv|$sVFw%HT+=&L9{OdD5J(lt-u?( zSebipjaP(bMmW+9F3E#Ni<%xQYFVUa=!3A-T0#+v{+HJTwfJPn#FeR`x{GeLHR_kh zlW%2>ftiitf{caB^(8-wAz8~Rc~xl%pGz?AT#_>YcmJycOkrElD|Yx^uo_Q6RQd-0D7&o&lCfM#+eg7zNE+Yh`|A>iLn zn(_c>h7qF+`bH-dMwC}vXt`v^(hw2b zsET!KD^(D5v0PK`x0%|2y}0lfe|WCL@19s9wHELaE0IU&<&KpXJ}?qg9NX~5|6rzy zy!%U2{FPUj7oo9iwul3+!t3$Ry_~);Dy)Hb-M~wBVUUBuB)gbPQ#Srhz3NDlA|F+h zB5-ivk`P704T|1Bs{;VHt7(J5l3OQS_(>XP89z+|dE-60#Y^{}%XIxwJ1XvZ;1o0) zFwlUvUx)Bk-ZQ(E@JMR-_HI*HGw@O~l0$zgm5AECnF(4i-J+8PVy8Jkrbm*gAnk#}fpvP~1I#jsJ9BunSEtJL^8~RBDVf=BBRXFd!!{uDHu=<(7%CT2l5Z3WevD^qgD^w`{Kn zyQYO*e;OqWOeDt6QKk3wj|`yDti-M3$i=yInv5jSfPe!DfU%i)LkKs!n5N(g+aD3UC?~RCw zfaEqH-e}Bt@1rJYmC*IvU4ko(GE$AJ1I(5Q7ZpIT-LUE9NY_0r<=&SsgX5sZzn5N{ z8q<|;X8Wo~Gf3%1tzt)<#4>R~*-FEdbw~Cc3yEgLJ%#Z~h%axHj)W&yagQF=4dQQ| zCO>pen+fT&mqr~4%#Np2?56}G7#smwX+ZInENj%xI^Z&pfK6pLhDxF>Qd8xG=B_pHrUpb<%T#NI4AQ$dmmP4%+I2U> zT7F;S?t33)&bFRk>fg$bMamCM>BTbih*N7tHFO1g|g8I7}FXT0&^J{9C@TV zn917_JF&}VF875qkBy+DH4d}Ta1eG`tM<|e31Q=E_pOnR%w(3iui!g43FuseXhA<@OL;%B`g8*&enQ~N;+XoMA4 z9m5n}6md%MA-_Qc?7oO1h-|!hqPCY7mk%cPYJy*&FZlA!Q5+`4QkMl;a`J&^6Yev_ z%vlKtL*rF+$Vt6foc<~Pl@#efbg1k6#U1pXP9iIPC$Jdyj-ZbYe&4Sx(xAb@c7Mb0 z6BQp*V7({cF3uT@OFm#jW6BE8gU!+^Gd>v(X8=>5yCsMqFNBm5#i~m&mN3|*9`S;+ z2Gtb;EDyZ-?%|sM7Jji9fQhd8JL#M23ssiYO`T&|8w>wAQ3hYXpt`@L?*10+O$u9J zXDW@HWnScmjc|cERiYr!rE^Ce9|?0J%csWKmv^8T!0}`|Lt`o*Gh6scT(j5GiF#I! z0Tdz`6^~}FnI>`O>$YtI%pE2*wzn7wC^;K-rsBG@-7H!v*C^y`070tzRbzu)(4Fn& z8Dt%X+ZFjDO@7v!>Xs5DoGp*Bt~SX&Gnvhos?4*P@6q#}nnv zLTDwjWzrFR$~F>w3eA%wxh@0=TbkZOy1~w>`GUYu8bfz>LR7MT3+a!C$f`CLxh1SA z1yraCj0D+hi@Yd@c7c{1tA77 z!*>qqDHd}ELt)5J+2zCJj>;D*(m2($@nxKH9mpFBQh%Qlt zhHs#PnF>1B*wqCb9isp?wTe%u6c1$){rDjyI}uF7;SMNSJY)^A1U}eGa~nU0J`@EK zS%3rRmr4Uw-N$d)KDtOA~5#(oRf<5 zBwN$a28girru!gNHWLmUaGx+vKC<9iCL%JP5p&|KmZrc>#3~iLoeon`LtocNpqM5c zAlx+LL;7Q|-NUwSiC;??u#dM={&Ly9JUSr%5Jx4LB@Z_v;^FNVs4SXyWK(APrN@jN z5pN|$I0oW-LXC2dGYav1aCHu*1C?6QSR;z-vs*a_g*^>wK>jN2)Y@+BtfdW`m{Fz% zh?2f+bwSLwBKdxg>adG}6z)%TBQxt;?2DO6v@v8{f!PS+;s- zu$MfJa(a&OFg6`$bgYc`kVFZMKmP>)q0(e7R=*Cw@}lkJpOanz4pG~#3?RX69Ca#(u^+h1d8^7;op7}Fvm!Ap6?;G>(?n(_I~aM zu|yg*SnF;COcZlp_o{i)MpikL2d#ArYf9j(#f*66x%kms5EaMGFy3|dF!hpz-`AM# zD~m#^KT&(xyrd<2Epz!c-Q!xB+8?t^*=E+DSUmpc@Z^>?;7C5=No=$@Bud}|Xw3u|?jh?hFQ^Z_P%FZr zTPUZ#kk=Zy2#nzcS$ppkA%PSBKXAXv;t~2aLZQ@xSz+ud`i@b*XHfn2Ksx3#l3o$6 z03PX;G3Hr>{F0(@@KnPd@dFY@5)}TLv4C5%M4BS^c<W_z(S3EOw zTaw4qPzeav#elWSGTHWC`L z94X9l)MSd5<;`hj@1kRtOQ<-Jm7_RnQ%z1CT&(5jtYwZ)(>fK!aE zT|1{NQQwBV8|%?0A*)xD{ci&ES`o35x28WXdgUpnGg2$r$mj*}3-Z^!Nef4Xx{efK zFBlpmD=aAa_Y2W6-BNp(!uCq$NuuhrnMNH6IL!)URB>-C3atdlsfq*lPXx|(g%Rci z@b@t^C)ISVa^3g1o^*c{N;k3{-DwXz^P|?aJBY!G$H)DnIxWgb={R6QymyXUacrNa zSs-DT;HU$yXgCu$D`;Vni1r_6_{<>`)o7ZBj2{064L8XMEqlMFLo;#1(r-ZRh`~^l zpE{wVCp=CD0hH0pYdA@ku!!}nU4b6L6Pd^_Nd4mLf=xT!O!}!qIaca1C$0B&wMXPt z@aC0Y|L1 zW&slAzz)U}RVcpK((H-?hYRVxqI~*9H3R$n^BIoVPNov!2<27yr-F}cFo;tT zuCbUy=Kke6I@m4oFO!W|8jlnfrtyDW-)PXpQj@Csv+3ql#EUqZkRHKHX+{t|8UX2f zh(I2&K2g#eo2oTSiE3@I&--PH=5)Yw%|KI=MXPk@tHJGv>BIGQjH=Woflfr{(q~6L z4ngDMUC8W&NlKY9abpY;+MvGn{09npiy}bMxgG0~`YMOeBY-x-@6i!$-^Ia5r5jUX zC)S+;c`cW2qEtfg8Ao-W%m>`cUxj{ZjXTierc*0JQIDZ4utF&y!jdL^@D6ohRp@(o z+bzD>DT$V@3N;%S&b$h=Yy#OtAO6}~+VXik-HRj`Iv5~l_5CJMM9sld;W6Sl#{PWy zF64@!r7LO@;}(dgBL};iT!ks=7A?9u`X6Gi*Vq|vO|`V%YEgy~m*YK^=$4OirMB&rb&>o;!PA;DT3cRL z<{6wCg?S+mq@3l9P=cp>t9mzu>L}h5CO&{IZ;UFRr5N!y(jgUiLe#hJ z(}}LjhpLK;=}b_SnDpz2oom;j6QDL?N6{>&?Pyc_@G{{Wj2_$+7=?6}sdPuifVf z^F9G=*(HAm1pWz>l2BG51J7;7r79Mz{lCaO8)=7jnpRg8_B+KVLC^hL31*Hxf{{B( zM#r|4z4TcJL#>y~qH6wvr=kE2dkn0Wv7$te{PZ@a!O55u7Fy(TOIiT{L7i46WS+LN z%vwi^6^S+zF=ww!EU!0*{mW+6BZ5O_ISNK6(O$s^S2!W32_2^ucX(5oeb@Qv!JLRG z6sxMik4ZV|ScrcBx@Y#^r}QMDsi7_C4qF zg;aVP0-9qpXQGCbaJD?RbUrYq%8Mx_%2dBTL5zeb@Gd8od;>6T6B@f?DLiEMA*sm8P#9n)*_66EY?|PRAyvsgE=cR|Ay9 zZa1Tlc>q-{OHL+>Ev=?V{M&_-CgIct)3%)09r}nAAK8QTlb7N(2!#YSp4_QOAB2Qc z=_~m(S;J>XqbU^~_3nrwNe;D~ZH-u&8kg^o9_z#7nxC6R(iM3c|4 zRQDjCl>E6@Tb~d;d_hgDjLuR7uaOi(t_+Xy7gzC}k$?$i%;gRq@_eD&!=uzI7RY%e z`OvRrMhY?4S>L|+wcHyZhHJE@M>#;uAnkJ3J9Z~3_+yE2x4v%)6xXKDMNfAk&{jec zH1(@8fel?$w_EgXh|$EqA~0n-Ob7)F3H=haGZcD;o3&NN$_j-WI8g&8xr<|<_MO18 ze~G)PO0c9X<(qR(r*J4tECmqH3xQ6oRo3y>Vd`E3m(@eQj5L{9C7ljok(lq;1s+Zf5n8a)o z(-w`lr$>QWY1i{z;*b57CbO~poDa4zd}L;}33#sB;Fa2mK5~4`mz%vW2`PCg-5CP=G1?+VWTg6RF5odLVCqQ~|4jCT>^tdI^2Q{Syb8n2WDr zp}j2~^niH%G{qy92)lFP3o%byxw7DDcwlZoY+AGLkK{L>*hLRMmJXHzK9+) zJztve!VU~;Og2;}GZJZY5y%+L!iLNg&$SSjw-t}e{<`vwEUFFW|! z;S{3Zz3N*o>;(zDXjlz>kPo}shd=qC#x|c&-xt2y9__4|=L(%DB9W6^vdGhyAlKNz zqcE+l$-S8s| z#3N}>jV3Xw`YhJ#^KjKaHlm1N$T*e|C@0>$dllmF2taW@D&#CL4T;+z}Ar zNVFq{9TX0gTpScgf<95y&`}sv(eVex@?uJ+U34iMjT12|`GA1-GiH*2>o`4H3myk? zG6z6;w7SU2x&ubVJ?eWryyOUrFu(Vb-$ik^EH{;*Ft1`WRv(>$!My|x%0P;%eNFOvnq7+{q^h<}FrSHFTox zjQw-$mf$#?+kWyP(S^rRpYJCMCdfVeOY8-^^~y8)|HUqRXp%1)p(ltMM+&%QbuJ9T-dTs1%) z0WuBK2|spYxl@I99;(G;ZUKwOafU9#72@a6Q$`lYWY)kF5SM;<;yXR8-FUv7p)j?s zHax2eGMX(t-ZJ(vB&7W0&{g_o?K%8bG^PUVP4L)6YjY?7herw2sOfRMfIK0+3Xy6q z76rj`URlw^pg%abhUuKStm37E_i$3%$|N=b8Xq6}gL@a`3B?tPHFjsJ8FPfgHDe|O zK#7$%6_+4$2P~!MJ;$2$X^B&^T@5kw2cADreYR-8;4@`CFz=OVfUgX*uob`biIB+Z z{f6G{vU3M(buaMp!nq!tdy4~=Fd5TKNj6d(An&)P3M@u-EQ#v5!084y*yP^4UvOEM z1CNI(EmMXQijB=@uWu9!lqYA-in9eZGkVzbJbkh1plOkF0UMIN+cWz+DAUf!wu1e% z_Dff%H|syxnH#@i8w#Q44^qQq7#)lOGX~c8kQLHPa8T?{2}fDM6^gRinQ?a2ftnH?+yU{+I7kOaXh z3Eusyv=XVMmZ6w^gy~YBb19!>F@wyi^M7BYXbe5!Iw+@-|48V~DXwZmeGBQm7G*zo$-T9FEnx zMt=4R6yP!{g;)yQ@ny_V?dx-lHRJz=VWJOkhSUuYl|&mIw<{dHKR_QM5`ncdMEr2j z(SmbT7+XBhW`7rxL>3JN2_3U?XpRLY>Fv^OJTO>H=I@mJ;3WPe_@KpUwn{QfbFv=0 zmN3$#Jiyhuw4_||Hsgfoa^3Y;K(s)o>M%Z9hkbbC{KE+Z2pP-fer8@?4!U_GSqu7l zP3pY5iE)c{ppWT*Lg!TwsReF1z%URkZgH}*2fXwct-MQ#4pQLTQ?5Uh(*{$b2Q%rC zoTp=Nj`96`W4IHM4sQv`x@y*g^7c74alq<>x*aUIb)XA#0JRud=bjp#wj-X5SB#9+ zl!(aWoEwv-{DRPjbapa#sRFl>$%pJo(&#v)#}=lx5!d3?Q#@JW?wk(ZXs~gXm2#RH zWzjO65&{1mNJ=rFSKxp%BE*XjGQDt%>YE}hi$r@rhM`#Jh66O(%f4-Gk*oXPfJC=K zCK!&YQ`s}f&GCn+8qe`925zV^V0>5;k0CFmd`qZ79YmLXsY`~H(Q0@F!94Zn4D!rq zQF2yMj)oz=15R%=1uY02c#zcF(;?~Y!+yf=c`&$vf+FAKj^#nwCOk|^hb;`gsjm!d zGTgY+a46#Ajc^H!q6OrJ<4~FrH?GI-qZZfj7=I`xX}v4CCay?3Ea_glyMuqO_(4B5 z>{57dv}MFL?4Y&U)g9`<6pocF@h0rmT4LQthb)t&|FFyU^+%A3WbL%+2l@YNUh*q{36u};|jep z(EfuoxP$0pv4OGO8?ZdK|Mpv`mX$Q5)HqVRP~NfHuKkeuGLH8n(;n#6t-xYClHvKE zD)$x#qE9r)DvIyTpA>!dRUrjA<^{G6q2_+GRO8=AoZ0K}TJhi6(h?VeUKZMqIMFzr z6eX-Wt{j*CluXi$_tZNwP>Yr{g1J8=>cg{{aFbXwYT`uY09M*xQ=nu5E0Dk$AeMsu zdj%r*Bb_G14%hPcsWDFwLBU`{O9#S9g^q;~vl`8?ZhmqWEW5^*EdwyR2ZnF&Q(u!}Lq;_8FY=mnul{-bTN_H0#;ZG?cMvTf@aD)w{+T8C*`h|| zzaFW+de6cS8tk0^CT((Ei8YU8lS~WBhFhm* zlOcF`thu}dqoa2*D039LoJmYY4xu;ymNG>S-HGaA^5M=21?Kq}!$1xgzzao4C7N~O zO_B49i&cp3${;I#@MrO_Gg}tIEvd?9 z7&;)Mmb?`wW1G2(xI{F=tE^CLQ?_g8~q6yd8#Ijx+2+uVG?;?1W z_fnKO79#u(#}rc@LzA`|T}5_@o&*q_8|N)NzV3|rK7=9yrajSVIJIFlCrlW7Qv?VJh1D<;P3%by3P9UHM>S|AsIIa})Zpx}#0^rN{L6)GEpW zBj1G70=BHbx6VOM)^Cl-LZdy5I!zSfPv=>}g@@_pR*HXf{(&$Ugg87e@W1&7MeW#0 zkO4M!o9?jjIZaMCR4FM2p?DCwv&8Q)4cz`9Iei1k34xwAcUFEk#jFrPkVA|Z@JA>< zIw4(2qWT<6742lISp{PhKw&)H6t@d}2`2?ee{sMAf?&XDW0EKVlPcZl@^U5U_6%NT`A zsv(x0#>L=)VMFU9h3nl_Y5rBLlXa~FAH?4c%&dvT4^x(Ntw;?zTmk??m#n+ug>X@b z24_4K07&%aJ`Eh@?(`=&;60Uj0aXOLKPc)DVsE?a0Cz`te^_u5x8S*5g2T{eG|Nb0 zC?48juabXc4SqT+cBz5D3Mg zn1YE4#sxBnl@2^WX6(bH2>owdQ759}9<{hzy~XrqFOBj2U>!{p5n&WpTTz&?3D|Ud5p<|m(hE2p($Z@y)NK#1Vgg>TlX)nx%IYW246oW#Xenwd zP~y%>`m$)v@{=>UX#%>ag&FQ7tKzhcl_OpJ#il<(Jh5`T;WO&W6@DrI)lY;FYz4~8 z$W4!dtOm-eNg=a)dtrRgcQ|ytEPZ^?#xm(w8MQH>_g{#EKX=LPOI2PSosgqF z=}KBYBu=_d-D7rr@)NT%?dC<(WFqG}Y;qZ-gQ=ktSa^^2T{(3?dF?@K(p3A80GrOM zx1R9I?vIn>{!tv0*VpBl*hF$$;$&bIJi?QRj~9hCax&zY#S_|LeMJ64YW0(an*)k- z1wFcqHT=^2jJ~IirqVDyF;ZJVg^31bV0<0Us`Q_ra77pN0l~7bSiBg{MYNce_d(6G z9)iuXUeJjdj=@~p%zPrU_7sODAPx8qIkC_5jgCdkT%3Ng$F~fMtBVo=1}{*$Ifc5j z!Q5!{KioU^8ESK+l< zrMguDJ3JGu{cFUGv;;gG@5_X078<7oZ5og~7-c-b56%i&U*Uay+XN<&VAn)U^(&5w z&G=c$x2y(v^i)~H zlkX0s(3PL0%-tdp=2nC}`*m7H2ldO#Xe(uyU?luJlK`8*Rj=0(MmnYr3&4$S;bR-8 z5{pCs81RGU3rG;A%q8WRlhlUd{*WI>DE;)>0CTPoe9-d0B324C)Ef$3+2*BO{9l)y?Oek!nWC+W2(E4#?T_m3uKT}b&RS>VYmCVZc&0A=BgkAG^Jo1}kIaI{Sp=PuZMmX^F25 za*@oAJ(VCYKvdgCX9f(myyxzpBQ9g)T-s7<|Jn;qY=d(ljU@M$t<8V5KCsDtSgub) z9e}o1JwQeVCqHFFUsx1Am4DN4g%Q1Rh8)TxvyR67N3j>hzd7P6eF6bO&ftv>qZD>T zy#bByD6uCPAjjp4QjPa#@G}Rb+WJ&EhIFC6s0!*_%u{;2l&`)d+4jL!0Skl`N;b-# zqb4C2)y2?)TbW|EB}I$`@;WtOE}{M`8AE_V)7EiHC5LKsVlGkNXIG7*DR0w?!Hk$g zJ;j9 zuN3f#6sRvVgl1WXEN$2Tx8`g|IvaPSTyG)u)#DD17SqHRq^ANgP4kl`nz2 z0cRnpj6gG~Wyi!EShE7zn@_?t&<{?cjYYpzABLS{v1h^P#jyAo-Xn>YD)}IdGv(=u z=%-fSSYy>KaF%qtqv~a**6)9n{e2RmIk~J$ChLW2mJVTIj7Lxj^XO>CMNzFK{aHoi zzjjaUaB2X|1j5IO*Hd*X00d#)$-)D*vx?ek0@{lKMW}J*(x1J6b-y&%6wddX|5PQ# z^yA!`JvMH@oJ)zO$iD165F0JJezn+!gxA&jiV}q?rnM$V0$`xB9Xz>Yj54D3vMUz= z_H|}AZ&2~6+!dct)G4osW5|u{Rhvr?r{%^|!%({mw)nyPCvEy!aaZ^K}yk*vc)WtI6slcUg%-*@}fzlB_7e& zpR0wir|{-M;;nEoSxzVA`tFIBQSh&bST8FQw>n;dO;JEV(ZNzwHU!L|R8L|g>cT~8 zR8+g3pe=V1kUB?2H}Vod|#rDB7HCoaD-D-(dfA?l51>dQRl<0DSp% zYbOs-!Be8n5UP-Ti*(|pp2~d~5@~C>4@|c411I-TzugK78eDjzOumOw$2md@rIpwt zem;!>g#fsVBtQDzna=PoI#9X4_CUlW>Bm!ZK#oPm@YkrUfVZ}h(McY>tZm+%hd#hs z)2H!QB7kyc2DBXB9>&#veZVUtkbhJf;R!I+ z>lF0*41#e-Y7U^{`U3I3Kwv_CR;V@4-QYEv1I>VY$OadSY#S6;yqS=-_6g8R;)Btx zU2g5nbBJ$#%!7H$N$V=5!}WJQz)3fgM+lDw=|l{iC^o>Tzs?EoB(Do`fhLOg+hj1v z4eIeCd_#PVQ^;M7Lxgl@-d!XVbAWKCqd1f~f|HPN5aRQ{={paQCxeQmhut-GuTgm4 z{u}d>QDCL{&SX!?5JL+ph&3AUKE8c%+`*F&E0(#2lNB9sFGJZw-M+e80D!g2e$JH) z0jFvTmeR_1h#7!9rUkVFIC$$?3ulNrnTlZN_RDPihG+9)U1*v6RL@nn)y1Hqs$}-7 zzXoXzIm?Sj7_8)O>&|PBs2u4OzCLCPN{YfqQb2G~Rdl=SBeAm3lDMl+h|%bSTt&C| zVK0zO%K(F$m@I<8RA=e5^G&M1KvXr6MwaD;$*J~Lzt=6&-hhXC(uj?A#c#@U?f(}B z*=s@+HO5JT{23TX)J3zSi8~pGuLz)03$SE>4Hl0STfH9#kY#y55Hrij&VJwnDs^2Q z_t1g@Yra7uw4Ey+qu1=4!Lks7oNT?2Mtmlx188OFz{>Y#WesR#x_O%S=& zsY-FWUFK_%vo=rN;*w-WCMdOTO*KwJ>02U*3s9I98SY^05EF!5h0TgPCkSydotoOr!LX@Iug2jX0`+QDAue1;7YUL7!7)oyoZyYHxZ7| zyNialF0jr4hwW%^$*__0Fc!0M8$(W$$vDl*Hk+*pDgyTtQp>!|-AT?^$Pi>zrdU(a zBQ>;K$}ajb3Qafmqs`+Yf5K^7p0WA2ow~{@sF~#W&joU-LQPIa=7ZpdF+|&8C5->? z6ZmUBz*pB3bm$N}c)feH+4>s;2R3u{*OJM>R~nbJ4y2Q)NjqHo6&@gpRh2ZZb{ZV@7Kx7$Cb~OT-+UhL#cz2o95}nM5iEkF-n;TcmXmr^l1erCUVzg$} zyQ6ZDE@*->f^WCYbGITRVT?nLbR+6@0SOe%W6*@}oydf#(g^cPA>xi# zGUrkldkjLgixtF&gqd&hQRaEx@w$8=79t{X-O5~+(fEgN+Hims?hc|oH`S35ZW6T{ z)hge9$F3UF40_0biJ7Z4?111F%CW%gG-JyA)OqN$kDHtj3u=eFAi`CH0mA_ zQHe8oWRkt)s8AlO^JoRKN#$boLAj}+Pu$uax*#NdRQ2QpFo7x9AgYRKJ(W82yN(2% z4j-m*53K1*R}63#D0_p5&}g3H-ft7C57pPk6qM~SC1TA_z9+`&Ld64B@k1tfsr=`BA7W*W)zc#_P?icf5dmF#V z3i|&V0QhA5(-C3ftYS%I-gY4KaR4&yg88Q!7N6RetU>JK83W#FVyogfX9CggpXlLg zltKG`l#nd0P}S~I$L#RzB?o}FOHRSVYrfoh zNx1c#vIW|KRV>PIPe7+=4k3}#0yyCNf%5TL33Q<9L2+@uCWzHl3o#i?6o;IXhTih5 zSS6J3M0NN3Rv^8wulH21_)5TKcuE~A%tTOLP8idt=`@4bEz&rABa(Me>{fK{^ej`0 z_cJGY7d4fwZKn|8vE$bi5itSde-ZMd8pWsrp3Z+JE6qP$z-j~um191lK?Em(l3r2Y zpeppjNj3Iw;SwvgxMVqQefq(+aBG#D{6cp|#Pt)8(Ky!ecsb5)nAp_>)-m|rSnr$< zf*VruY<`By8*QcK@O-u4#j4z1;Gm$%l(#L_*6~}O$yLO}m8KS?pd$Qw;NwPgW0^GH zvlKadq&p#1O(VX_Uu);C(2xN!bah^gqZGKnGzN*}kwtpAg2ZR~Mp?_INT{W>0w9O< zCcB0;aB=3$;!O7YeX~CS9(TEo({GA9$hn{~=79FYLCQs^*-e-xEr#tn6~t2zBeMNp zWQzpxw-|B+3sA7&?f9q0JQ~Z7a6x;cKWSZq5*5SBc7N-C6O= z!jtqfdg#CbuV5HA58{FL8)$Y%kcpI;d2c1mD6FhlAtNeSG11=jK+bupU@{CQh!!s9 z?v~>#o2bm}N0B<&E>lEn@2pjeY-*BruQjh?YkO>rt|(cLx{SAo9fZskiS7R4p}fjQ zTMCm0NY(e;@zSNw;;oIg*etyMB;@zRc#p2Kn@*EAB!tDvFbuJ9?TE(NB`MWY4Kq`Y z!oj_UG}DC;*XvF2Rc5Cc??|?Lc9_jlj}VP~lL&aCA#tfV;~*ROk-u6BAfY&X>Cgh$ z#q5~55>*{4_c@+auNq=C)gU+PvA6H87vjA{KGI9ay%gUe`yS2%I+hsu;F;*Lhhq#bl&BcrSpG$8$VtX@zkRCqT{;@rXSr&b{|rE2_S zhi*e03%$|)QG$YCMVU_;5~zX1vb+cjQ@zS?fDib%>?c@EOeTQnB-GZ{O&cPF+FcvU+T75p?^aqm? z!`3`BbItQA^KsSXF)0vY|Lv&`URO{RY=r#{16-w0Asi7E8+Hs}<(B4U4Ybv2#8LzN*(%+GK51nUBKIZf{ z3Fb3Ub%fVCZ?){j(lf3bl<(s{dk1H>`+Eg=Z)_s=GZn$=_kxw`VA!RDm zMG_XHgJHO@gf*Zw(9!(yguo%gifRK;l(Rys@gCjGnSxrBmUeT5;^0`G`1GKqC8qe8 z#0xNiGwneT-(VC*HsGFmvkJj8#8T6iRWm5#ozBn9-DRTTW*B~U)&vuw7fEetglwue zZ;Vj3uV2wMp;|~%(CB$#+un(plP)<&0z$;}($RIXW+0KbsZcF-oB~ObcH;<|terWd z3U_5>5KNp7Y>r1wE6X|^uG@Nzfi3~!GF$4Y`Oqb+@Y>1Ks%4Gh_OF-2RF02Pyc7hY z(0{Fl2}uX^}2;MsV;aS{A_%%W3qw%uz0F!Gw)X^^*~xJ ziD1r{;yeV#TUtcWBW`rSPVoHG&0;5wFY5^qea~4e$R!L^6G;n!iW|DZM7Bq+THsUz zWK4dIDPF(}u$K5-WFB|)`a#BipB#`apcBh7T4W3xX zkb$Ae6sCY8>nN3vS5v|xbn5Y{Sp^=3H;nTx{8zOJO66i%?dkd^on@K`BsJT~`k^gJ zD>5X{+45vWD03{k@F|yU(#~fQnrg;}fHU`2#2ol#esf-2!}J*e)_aY3Y#{=6Ix}Mzq;+#7Hq_->nHkt&4f@1er2TXHjNvn~daCcm*g1`V2!EP<-%gR)Lxa`bRwAjy4w(?}C z{Ys1o81K>Vp{SXoQE40p!?j>3Q+``K0;~(7=gqE{nq&R zQwN3eRij}uHl<9;NgC%UTQn`+bQMeCJp6ProF9g9qX{wF2af-8**#^Xf8Hj0J~E$v zUBh|N+CDKnv~BJ;R;g`5_VebGip9Z$KY3Zx0%V}hmZ3e=CEq|}rDm_d2EcT*mie|9 z;NA}i$H*e4MGl6)>BI-qf)k|@7Bu6@5aP0OOrxNJ!5g=xa5xUFUQ}oqJ1?+|VbDMA zlKubi0u6=6;<}|m+F{Q?M4E)(A&Jv{f`urGz=p3IST!_Vm!{%GXX9m8$aUNpxC2tt zb4ce{T$tQ-vmlHp zlt92#9zU z>@U6;(nr}g5=?*&uzLxz&$Kv65%A7h&bRqNPpY+>$YC-@#d>Z&Ho-#IP_;YQbcvOH zrpw#M(DD~Dn@_MH?x2;qMiw} zUP&Z%CE#CZpl(-}{pb{(PPx-rE*(tto4QCQgtW{zbl5DA9X=N`5>K5?IK%-(kisLk z6^F5h%z`TdypHp$DnS#H~jXf-)^X#41eh2&l}mX^D5|aLLE20yHrq87{*WiC4X(17>JAYq*$7(x1UAy2|#oVq~7}NX5M<4su%+75N@6-mCafWb3UC z!k6ttzi61OjsYJSLM5YJDWoxz5Q_ku9#R`n52bjR09GSWE-BvMy9SBLSqdzIGEmTn zNKh-r(LmE~g*Y|?>t7IZJ`^k#M^pg_cr1tRO>ttH>hsQA?~>2n^#F9$H{qfBeua)v zjd(VCo=epx`90B@^Ox)C9CIorhRq`W1c52eK2|Fu&sG3R={rwpU@+gzKbJ@hm$C5xTS!|hK&cN$OdS@WIl$DJ$nyLXeQdVbRM{l|tkm zU9K5VVnw&NiTmzX-g%s&9lgN(=Pwu|k0=;Znp2UYIcSPWg5o)`;yjG^tdDY4%-6-M zJO8vI4P46=Gawcy*iJ12j|3J|o<;gq%+Ese>75Fh{4qvJg;&_c;Z&f$iPDtvC2i!_ zqA>4!M&X8f<*yi8DcWc!<52T=E!3ViZmECwhaX>?*}i*|;JsFWSkHJ-+D z4ZKiWZ>N_l4(>aEj7na(3m&xp7`8q4De$Vc7eok5h%vSS=6e{ODmM&o4e#9OM&guF zp+5f+E1pPT|5)1;_-u*dgmHM~LQih}nnna-uDyejvx-fi8 zaEZWy#q76)=?q!JhC+=o)H+>!YQt1_xhIxTY(3CC6QT5nXwo#Nh$ky69qWSN_eC73CcIZ795syQYiDGA+;Ak+JuF9dTWUse{0i3|+_ zf!A)=ycPRmDkxbEJ_-dZqd+@y=Z(_*^5q=RqF9gw9L7sP-m+Pedh9Gosr}oSY>KpM zO<1%pj=1}jPHd4~y^G=z0N>e~!BmQ=9vidDL8W{VmN0~|R1wzor-b7#EY>yqSB;Dy zvIg4Wt2skZ7wGG-icVgdd>B#>K16VM-D}&pRR6FgR(PE2-1|ujPtpAfITug4u0tJB zPEDL$r8(EoRK?iWv{TORiD-J(atnfFg=a}_lU3>5`V%z)sB@O!fZ%|!Dw9`45fVbR zjp63hPTZY|IdrF_OPj4h9x5f~Q8>xyeeCvRC22`J!c(9~tnKD$8;z?5SU9ebm>?MO zmx>5xH{y`d~DvsNfeogUVWf?7l(mp^L4n0|B)Ji-WJD+j?DDaqgf9@(( z)PRh7xh*u3TLcpb?#gdVR&_rB<%$aepJUD?a_XjQf2bF(lQ$2oJ-K+H6Gxc&>xIZbh2s>H5 ztA2m{fP1n}wuNv3KI$hQxSeT7HWM(-p<47>8#`IUczOKB&#-@l-3`(uvL#mA)E30K zPh)iMgy+C8ejqZz#N>s!WXgBL(nu^4f4VT302>bgbVPn-j&3)~V~dUWO&UC7z{)!#3-(-nFon6UuLcktEPjHdU9qgay9a~ zaag7+rg#ibGTHY_t0F1xEPN*6#pD2@L)Cs<0c*a;*aEKl2Mxyv+Qy6tl zS5D9EQ<+wfrSS+3c;${osrn$mHXgt%l*$8!Vg*f?vWmJh5Q8wS6(@m9r8YNDQuWO+ z$Vf~%_>N;vW%tDtR?J_qxrf~ATIMXN=YzvP{;h*ZQe}BkNo!A z-AYvBuRERJumvmCVW!QnDs+ZAZC96M>WB&USvZs{S*EUMYerec5lZV6J{RliWb%z0 z*#u{J#srSJ(h+0aE!y`CgEZ(6IZhb#N`my}HrMBwZD!n8DX4W86JJoe#aD4x$3wR( zcs&Xo9d(~gTw4Xi_Y{P_MS3K+g?J(pyXv;2&VNZ6Ne;qPr9U0lh4Y1n)+7}hy5Xx%+X=0%muT`5Xb?Br2{C}BjGivXlBbLmZc5tQ4C?moV)j8p<2i+a3|7R3Nu5S}719A(fK(%U9I~ zpRp|^QFIelQ*%iY9(*SSjr|9^#BRHa%vigHrw-B<{o^lKNxtq3jrAA++q$RFj+2oY zYLy ztir}QXd*f9#eR zg5@#W5kAg=o@`;17O!g=#Lrb#wBoOm{>Y`81-zi(jcu;=Xgn{pKt!)^D4dZq(ilCL z=Z^Z?e1|{|c5C?8Ss&(7!<^W-?9WAErK%1Zdo3{G-cV(m28nGk1hlxKt?aTV+MT#G|I5qVp;v%O& z*r1>=6b&W~H7JER0_ne~g$aJ3V%n*Ain4AD8%k%LT$oE3Uq*k1BWSK>VA)siOKmiJ z_y}!Fjq-9#NJq=ZMg!|^r_=$^IqqX}(@-MTQ#(?9e#{S00JaGOK;-+?5+VqZ$QHN` zfYJ;mdb-y)$QfQ7&<;Pw-5?ISpNtHlVb2L|Z7Xb{vv%zwq@ou}@;TvE(T*XUk+0T! zKJ!RxO<{L3)k$DSkw&y?ep^o35w2RJa1Od&Q?z502XLxG$R))`*D{1vzXN4ckmT>(A z{P2;7l!zVD8?f6ch#JT2dfR`$qK)Eh*zMvl4xwQ=&mgcbvqh+c3UzdwRTf_dnw6>WU-&Rg!B+C zZBm2lBT3JIryAJICm%jxhk5%bm>+6L^<_^rM@{djy}unuew5qxJ5_G!4bNKbJhn3=2H zT(wadyFKBP9XdmJW{5QSt$0+G5=06T;oyda6L1*BukDv9T;4@R-l zSd1s>RAN|%)%mHcccFX^>{O(hvlUW2lb{DWi^WVPjmf=R76> zf6el^S{mFX12RHII6fb521R5XUI6p&fBeCfu3!L}3woujlD&^;6l@+_rVM-T*r4`IO65%QQok7M?xJ~uY|z~(t0J8lAGD#fpql` zOjJA3Ma!^)YEn%fd{Qd$6eN=B+s0U5QHn@$@G0J2?RZG(3~-22QZENMo;WN;2X5Uz z0hMbIHy9yPGT6rx$sGDV%#Stf4Ha<1P9Kjs*f;GXF{ZcE6-H{&>12Dqt>3SPMZ-v; znZKrg&Dh2zj7%7)7oFz`>JQ)?_1{NGJmn+n9$ay46lKq8+X8;zfZDei8#>`N_K37-@EvHU<2nYbZ ztcpG5Y!M);6g6rgt?q>kjYOQ+LKhV_{YsHh6xlVMS$`{|@L4D;MkF>6V<(a`D2DbR zk~-(zU)ulO7plwfc>^UV0?w<}I%Z7RCd0R$NUfF^YGlWYgvRO5FxxVJy$N%R6E_M( z0nkekZMkL89d{;`myxmk&2@Jf^o#}c_7+I+-0Pp-*A)XTvmL^*xh(KwHpICxQS0#Ot@ahLBd!j+_uKmkZjuj!Zsn$x?%uZ|8B<1_(MPK~phxmA2RJ zlND0nVC(I1B|+N0a@7Ftu> zG{|sjm?{wm40ReE&%nB7Aq{=?gTbadEQ__H1UrI|^%z#`?|W`ii446|H){7o8`va4 zUT+$b>@ZdTV9ir@!dG{VP_>A1`hl%ADE1Tjf9_|eGs_X{Ir{S15K^~Qv7}sq75XgL zUhzeqcd%fuUqQz(U?C)U4D z#A)jzJ90P33e1qY8SWv^5OK!z};WiCKhO zg_kO*OZa64#hpou&1MI}vZyuq> zoxCX=m(`lu1}mY2C0Q)W^PBD2sf3izoA@%ps80}~qWe7$xjkoC zpb&!ztwL*V7aV<J96so#G=U|>^@&&-sWx5b%ZelzJht~reRZVcQ|u-G&lVLFYpPh* zkE7<5?WtO3^EBaSx}ZB_ANmzU5^OqN0h>e$ur53*Y z99Eq@eAv0nW8_vKVi~%~PB`@0+oDcA0IbhLw`^j7fYeb`GtuE`w5HLU|55c>poH6i zj@>B6vFlFI;0SuUE_TG8{Gr?Q6zgoM?K z(reP`VXa59V`1YjE@NtI5aWU0^bk@3tzxOHgj}`3Uj++Go-a1aNthz!=`(aSGi+95 zJ0taok`U;oqYO5<_Lz$;!Sm?xxb_vki>%o)-p8rtHE=6-sy4_u-1bgVjIlOYM6$*! z$^WJG)!Z2Pj%?ObRN=oBXaU_2<`+&gR#b}s0w^QA=?1d(e5g@KcBzm?7SJJJ4*a4jgrL|m@q3^s&#$9Qe0!HLJvKog zN2Q*4I6aZH31j=1NX$w{LlEHKf!}vki%(L|OXQzcCrBlb<);mO*q4^-Bo-;XJuqK} zuUW%?(UL@tacAf{^Y&->73EVozBoDu9j*90H07l;@dJUKAhVM+=XqtV<;DYw?uqpY zf0ays^ByG2*jE}?+`qU?{%|t6hJ>+gH&+=WCnD}kXVi+PApZxti*x!2TfnvxCT5Pu zYW)atCX_~q+}qMS`M5t+1GO>;1&}%>aEk^3nv*bQ$hY6zFNIBx4o%nlIzwe^t9Bo( zvNMJrhYc}P6mhj}HLTf4MII1Bn};LhcuQqVsUi!vpkH)UF|&P?c=d$R z`P~9meEsMfz4Kix;Y7>zA9SpK-X?dI$?uCCY=UMpgQGrKRM4?W$OV#>ZeM@|fSj-; z+uY4dm?Mvj7A(e^IfSx4_5-TljoL1_(sTYR=D+^ZL@!YzCvwhMF+)>Vk|H+|v!6RC zyk4!A!UFW$ie@z4v{R#Q5A8{->PMmpF5Dj%%!`ShIs1j2l=GclVU1)tai5h8NdCGq z`(&=NYi~e^T-6MX@Sm5;{*g+;JVcaTi(L*$vH6uE#tW@Ae5o@%fQaX`X0z{UR?i&$ zTUXl=rg6${N1>CtP#$P}Gw$-H6!%mGDbRLghFu>4tngnQ$WOf!8 z%a)$_^m~Ab??YFeVg5oOI8HL0san-HX?6W@S`ku=F>NhsX6VXm;R* zYD1MSk1qfeeR$M3R`Mqjia?JjU-szE1wWr=Z&1!2?t46}<_2+GyXCQV@}!xP2fFJy z-7>?WF>9<*X&*P(BM2e=W>4B%;u(4@pQ{|8@{QzPWdl&#=_6$S`fwq&>}5>;8Qj;T~=@zLOLO5zlS6^I1gD(Wz5OG^_zPSqNf zefH6oQb^V#3X6kMrO28kO@^&SIc#JkxTFDLpM0FGB05a;B+3Vl(+iFLGMFYg22m23{OJD~1vxT}s$bFm#%la=wMjOitUSe5hd}YRY;dv@xVzX;#q0$Y*+q_@ph40jbZraicqf0b<(&m; z*RmN|#qt)Bt#q;oA*EUAS8PC{W~bDi0vh<=XtMyky-l?8NpO`Oa1}%4OiLoGkz_Xb zZtRYZOR6GAH`hV#@4|VI1I4%7218?UMfia!Y#1rd;ZULY_M+**7;LhiW7eDO8!H5u zb;-nyHsH92K8|Q7pHWajiw=yd9Oc__hqx}xpVM*Rc4yh!s!sDyph14Eju5d9TY%}A zy-AeXxM2^&o&5kIH=H^IkHe@5Uc@k4>bdG) zvJCbe*%W{xket>i zsr>vfBde9M#eGQwqpSj*Qx)vyXjM2kU-Gs)&aD;=g|w}GuTnXR9HlFRtKO_4jW*=8 zAzHq0LM`ICCZ@&0(Ikf8J%G#4Iq_7+6zv}uz|?{nylC8$m``p}GuK*xh;VR6NByj+ zdqAfdqt177gQz9AmDU%O|4Im@QDO1QE>`^vAR$fh(Ut5`eM_NDt(#HF-t+q8s`ATq|^j)45dHc$zM-ZdMP`npeH>cPQxt`}-NuSGfIG$Dx@5vDP}jip6K8 zLJ#U`gd%LB*gKp){Z4dImMq$%0ZU=N`#>A0f00iN&8B;rsb*f$OySlVQ>ljV*&{5z z($qxo&9n9Nu<(hTGN9bsS228a=+R_k*DncvKazj0Ae1TR#6+@XZS$$7HALV6e_kbZ z^peNya*p>-Q6+#4bZmPwDVMI2{-}5o>_v9aHTQUFf4pMMhS$i0&gT^4J1G;yOQjs; zAhlZ8K#lhUKJh}?CAk?uFM7ikkZi0(4Nkf9;_q40|A@I}0w>Z=IPvK9*jaqG(UhO6 zl1BaN#R*88@+~96$zMn^LQmVZE`_=le99WgigVK7_8yuSa?wdO9;~4W59F8etzQoP z7~uZ$)2jF?;5s^k1>P^zHdoY_Uis4(>vWnHq3GH?gZh< z8Y6qq&m&a#p958AUrTeO32RZyR1iHnj!Oqy7vqmA*ew2E5Y6Egkg_;xK!l@eaWEt1 z9<*po$zE^)pa7aDIAxU5G~9EIaUfKzD`9B^q~S^?f^Ar2dr~K67pUF|<*|aLd}rC= z_Ka@h%H37ZL87F}Txl$E1AshqoL{|s}Vnf z+ZG_Wj}G7Y!7?iVR714GNzW9`GDUvoE<)KXxxO5dRgID6QfNhKvK5e)w?;e46t54L zbij*bViRrZ-W56Ek`&iRMgR4^yyeGo81N^s!(y zP$YL_x^-G%mykbExMWz{E>|-ZMkB%$mS#>gED$14UI?LWbt%1T!#klWBO%k_H{|gf zkS4VX3pWNgdA~~sI)ZhPkq2z2fWq9f_78lUvuidl5h5lVjp6wPgHu2W622ug8AFIU zw~7d<{wpIAR#l)hvpPNW_~MVljw`IDzGdb22q8{2UI&O<5$YTtm8n%0p@xSIAIk z6pR%WiV=ktp2S2*qJ}EeNmw-(BQ&u^v{WFo_*+Jz=#RDIej>gR5mG2}@Htc%^kXs5 zT!*@a{5N?N+Nw|5^T~zhv%s6rp>E1&WWmBsCPEKDe0N;yz^{sqC62Zl6;Hf@v1C}@6dzz3WZkwkYgZCpicbG zfi8UKI^Nw>#odWj*&S7=nIOlgyQzdblN0iPal6RPcE%o!c4tFjH>9KN?p~1H%>z$j z>>A7`b~IPxs^Fn$-OL5+Iul9~R~LSDQFTOhE(^!T)P*|{vpd~{lISXoRG=kF*CX1o zVRUq5_Mw}p9l7y&8-!EE1|Oqy0raz0IJsG^mmn+6#8`pg$mE?6EAN)8V~mzn?LK8 z@~sqP-Zb($IusjX0)e%ac*(QD(v~I#M@H6bN2PMRz;MoE+)DMj8S z(Czj{f=Iezd}WDptm7R;T5-pEUD%OPM=Lr~7}p#{ew6D&DQ??Djjkq?88r6J=w>_# zMdNs4jQyumBU1b8!9y3%Zn4Pg6>T=GC?g*dc{Thbs6<##QFs}msDv^ zuoq}|^tg;JiK=2Lh^|r${Rfc?E&R_GuFOYwZZ+=geCJRuI#W;1y_>BbtaX#AdZ0MY zTvcHvfjChn^T}M&shI1Gm#xTUt3}JMV|i{@F59F;TxF>WD*JU(wokaC38sa~Wn(0Z z#gUDCTNF{1A4DFz#gI+J9|>kDb1Wm{tK0Zetl*NM@!iLE7h63nwk0aI2E|Sy4niir zRL?NsF*f4_GDG~{ zd__l*KiacwC$4XxhqWH)Rxd5}WHlY>$vGz+EtnGKYvt!AuUMCrOylu9g&_}LEr;+n z{(Z)d=iXlwcen7C-R0^|dIuemfi9K1h)hFGV1@0B+J_`TyHUAplzbL?5u2l*gwe@(GZMWgaRuXr z$P=HRJ-8E{iwG^WDmWJ*hPfDDpPQbuCXg62mlGqA3WNNDuq?lc%2^HG;^bjAa^X$5 zPwzzJLu~veOO5**PT)=O{^ypOiMoqOUQ^3$(iQdjXwOg_PbAnLDw}ih#}}x=|GMLF zMJ&;Jf)Irvhr&8{Rm(WA6;yL7h2p3)Ci|3!ofNGx2U8G0Ab>zX7#tQ7QEV8C1bh*Z)d)aO3Ek_#@Jcp~j1g*xg{I(u z1ZnfP+j87%!|jdJRi^vvb|<;Xmn704_teI7H%eCU-wYlz(5jJ-K03%CNX+d z90|H&udf;N@ViP!y5Nnj3I@Da5N{&b?^{4x;6>w4P))hrsT+^FD!hjq$zJCSRr<2` zIzWc=6#hCx2G|sQ6;O`INiUzjGP2CwD^@+o7+eR0fgy&vy&W!&RpXJ@pcdMu(bHyO zLu+6tm48LMU4SuIXeK7imudsfW zmcy1d-y9=(J}DO}UJs3qHr`!0P^&#v4bR?F zsXuDj!5pn>w3s=KCF%Xj-x2-bT-o4W%7~~TIqcZ8oZKpd)<>{*O7V-roAA@S4HAbH zg;*(@frrqAwP)0z?FfmsLpjf7~Pl0vGwjn86DrLL?(w1S33;j^`hh;!yXG<;2; z5i$@XC`M+A=Lo<(O#oNyMo0Za()w3Cjv1;VWDq+ZN*^GgS&xHTzpv=|gfi6)RdDJc z63o+E{rJL8EG}pM|2fh9rYAN05we=o7HYX1z{a>p31!0A-`xWU>MH`+(O?x*k!2Zw z$(|oHMCiduaORO^W$s>yOcI!g7duU&?di@#L57C#sP1k@?jEMA8%L69b~k7dD&={0 z3nJneq`q3u1l5rf3XA_JlP&V!`s5S0c;v0xxaU;Us|!$3H9gk?K%|)4Xhi8ZG!ghS z%O`5GzO7T1{2bGhiu0`+#q05)z-%ikpT10UNt^~wZB`{xZKZ) z`Krx)m+eX|mIE^5;YzlnMMRcZfp>9n7w>ybt*5_dK>rO_fa%2S#tY2N&*Go>jA(+Z zH~y%IAsVkc<#!2!Inpqvmf9llyHgqD=r6+w^iHj=5KP?nu^j|gev;zOuzU}*;z108f_eSQ8olcXH@F{%5) zI%zMFqW|BgcGnB!SF&%u*{l@jIoXS2PI@!o@SDl(bZ2Q%F z__tRS-00&H_KqJk0pAHA_Nr&Bs4o@y36l`!0<;YR*|WcB5u&Lt6p7N9mSwtMUb>Qh zK3|OY>oZIg(K=x3m*5`uXC2K(dCL1l6f3}|YG)e9^S8d8?>c*jE73&gbIkok3K|GJ z`U`NaYhl~0iR8YNeGf|J<1r1Y4}=gv2djs;YIus&y0MCHDx@I6fFRqEc+Z&nhuX({ z4LBzb!GZ2N?L5PzouF+L>#k(1+3QB5q3`>bf<+$hF_gmvMd%Xq&tG(~QFZ+ayKf&Z zQpqMRSa|M1N~N`9v-1O%cBlZV*phkyG<;A=q57ArzCIvy>c{IKiWqZG7fL|L6v1?)BG6--hJc_uBbEXr&c5KIdh_Ga{@3=KQekN!0Tr zAeH}HD=WRtI}a!=QsJ0-;hY(K^fAwF@YALCP_@jcq4EB>t$^AGZQop(BO8<`O1YAE#`L3q?e5`XW19SXc}{oRej-bz znMcn=Z8IZK*iC`Ut0cLp2{g0`cw}LxfAN~JGlhKeR|pv^U?FNs<{P{EQ1lQ26WwSt z6FjL+f$OXu-8yhPy__BwG6Kg12(xOD!z|i;SKoaVTih1M(L|AJ3P(D*^F>ZkW=z;W zP@e?PmJKJvCba@gq4h@#?xyqF)wYG`pRl-&aAtD7yFqaP$1(MsgwzG2>s8g#EW*-F z5yv%yc)#q3)-4Q38xhZ?5tVg~)=?cUR)0x`by_~{8n_6{w3^#k^C}W+8 zE>ZRZH+e`d9R{gjzs#tB7z<4Ufhk`f@FX9v^2z)EDbt=Oj>q{-QwBQo=G%5&v#SL4 zjO)e2JDc7xEDusbJSl2#J<4oQFh_Wsz{*OJ(bO1~uDXjA{s8XPfYth;I9AO#z8G}k zTe5gg`k6)}TJl^|Q(6cqzuY2EIs+-jF*Jay&9n$lkM&yQRwZqu%3}_RJd{VecVEO@ zPO_|*?GLu-Zzz^opQlj0n*zx6)yC8=Fyw3;R3oyFu|i%SJs$Aj_nc1&k?0VYes6jK;2T|vlTTYZfw_cukRLwh(ywweu2<_Ff0p0A zv(g;g2}Nnsj9oIGPvWR-sN5Ne77^(u2J{HZ7+~E`5OOqg(GMfo;wZUW6uc}xfmbJ| zW?6Pohl?UuQ8QGQBq+yVnj`9g}cyJ>krd~ChN!A zwSj&rx<(LCq=FJhR0{#~jKaQJ0PeoH35U+!tdK|cVU~mZbB>s>w@xNT^qbKqPlcQl z#W=(fm>6?Uo zc!9?IMVKW8pZh^$F(un37onA5)q8IW;n7buAvTQ8qDAgP-bwDOhWHfRHuDTHgmoos zjWIw6OA<}nr<_@bqf9cGfZE$WBm;j?6puX3hZmP4BU5nIBq}mF_i)|hEokS$$suz= z0a}V6tCIfc5UQFXE(d9Gr$RMN^NJ;$KeQ>QMd_*!V@zb)V>{4)j3_@v;9=gn*yuL@ z0gXZktUVB>zIMXZGt1}efHY;U?RU!g8%ASnvvpNBaDP@~!jEHMN^ z+u&EK$d4IkeFIW|-F&(+@_z#2NwXCc)uy<-Mmp<%%#MRHCe5cVs;us}?sew$wv?+* zs40TH=;*4WD7d}`E?r(m*Aq%Oz@W)vlJ!w29odM~X(#aFRjEf>ml;lD8*1w^5{A5& zA!}-cq%*km<-?eHp+<8qOjH5I>jG$pt!i8&Lmtfcjruhx^MNnbO9xkKc%74#FFvfM zP>=@m(2XLI0Tv48^or6Y33BCCX0J*K6% zeA}2-8sN4!6a&66cr$;HR(&e+Js>!dVuIq#`umXq#ZSFR8=8pf|MlVccu1r(QMter z&&0t`yjmeK!8Moqi$pyR>GA>=eH%m^T8>>sh_Bd`(7;z|gw+2?6jEmj5f8zjl9$4o zIK%-ZMQ~-HKGU%2d+JBk8HSV=SpJ9BVRwhhYS0Y0z<*^SyRG4OeF>0mWls}wW~OfD zwbFYE)1=uFT() z^PKR+!P_{N?x=f}z(-IOQdXJ;3pNRDoZt!ZsWMcU|HEqmz9L1U8)rtZPrLhPI8L7? z?_KBKw~fJXroGfit60o6^)qa|Ad*ZULcr1+NKer?6q!!lK-Dp4HFwG+dcJ74X-sC5 zK%qqxquJaUemu;#cwtN3^F(+j+o-N2aUz>-(tPX`gwt&SuV|DA#c_xthD3{-yxkCt z8kwjLz1qzCK~SBUQjLm@r`_?%v3?JP3i(RZte_MGXGDtigX_uNdvH_xt5c+`o@9i# zHnm_3OP~#VNtRfk|31c(v|Q;GR8t>1YHC z{sH>8BrdYdKrY_^9L1DM#nGXN2OWqp)swdX=1A-3dx`mq1X%Kff$GTd#ewDwNk7`+ zh54pFD42tiT$LBD%sxV~MGAlv4wpBBff)lBjmFVYz?3~d!eRvCJBR%v%A2wSF&%`q zt0u6r{V4g4_l!I$A5UGO2xBs=IEDTvo^p}4++j*$VZL~L`6NRO^)Vd3&*Sibz)ze* zMbVvJzm91b{HQy@fes4Y5IS1Hv>Su~>aTK`SOJY+7K68hA*fBedM1Rbe-N+$jih>j zg9&>^eC?sEWgz}o=Ohj(+WZH`SA-=iQ}889OwqM1mJDEv7$Q@J7II$#(6Ps>k$t6k diff --git a/test_runner/regress/data/extension_test/5670669815/v16/ext_index.json b/test_runner/regress/data/extension_test/5670669815/v16/ext_index.json deleted file mode 100644 index 1157e0d032..0000000000 --- a/test_runner/regress/data/extension_test/5670669815/v16/ext_index.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "public_extensions": [], - "library_index": { - "TODO": "We still need PG16 extensions" - }, - "extension_data": {} -} \ No newline at end of file diff --git a/test_runner/regress/data/extension_test/5670669815/v17/ext_index.json b/test_runner/regress/data/extension_test/5670669815/v17/ext_index.json deleted file mode 100644 index 7990b2c3a2..0000000000 --- a/test_runner/regress/data/extension_test/5670669815/v17/ext_index.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "public_extensions": [], - "library_index": { - "TODO": "We still need PG17 extensions" - }, - "extension_data": {} -} \ No newline at end of file diff --git a/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0--1.1.sql b/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0--1.1.sql new file mode 100644 index 0000000000..1fb183dcae --- /dev/null +++ b/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0--1.1.sql @@ -0,0 +1,10 @@ +\echo Use "ALTER EXTENSION test_extension UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION test_extension.fun_fact() +RETURNS void +IMMUTABLE LEAKPROOF PARALLEL SAFE +AS $$ +BEGIN + RAISE NOTICE 'Neon has a melting point of -246.08 C'; +END; +$$ LANGUAGE 'plpgsql'; diff --git a/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0.sql b/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0.sql new file mode 100644 index 0000000000..b51e3ed19f --- /dev/null +++ b/test_runner/regress/data/test_remote_extensions/test_extension/sql/test_extension--1.0.sql @@ -0,0 +1,12 @@ +\echo Use "CREATE EXTENSION test_extension" to load this file. \quit + +CREATE SCHEMA test_extension; + +CREATE FUNCTION test_extension.motd() +RETURNS void +IMMUTABLE LEAKPROOF PARALLEL SAFE +AS $$ +BEGIN + RAISE NOTICE 'Have a great day'; +END; +$$ LANGUAGE 'plpgsql'; diff --git a/test_runner/regress/data/test_remote_extensions/test_extension/test_extension.control b/test_runner/regress/data/test_remote_extensions/test_extension/test_extension.control new file mode 100644 index 0000000000..826f643daf --- /dev/null +++ b/test_runner/regress/data/test_remote_extensions/test_extension/test_extension.control @@ -0,0 +1 @@ +comment = 'Test extension' diff --git a/test_runner/regress/test_download_extensions.py b/test_runner/regress/test_download_extensions.py index 2ff525464d..30f8c65cbd 100644 --- a/test_runner/regress/test_download_extensions.py +++ b/test_runner/regress/test_download_extensions.py @@ -2,26 +2,26 @@ from __future__ import annotations import os import shutil -from contextlib import closing +import tarfile from pathlib import Path from typing import TYPE_CHECKING import pytest +import zstandard from fixtures.log_helper import log from fixtures.metrics import parse_metrics -from fixtures.neon_fixtures import ( - NeonEnvBuilder, -) -from fixtures.pg_version import PgVersion -from fixtures.utils import skip_on_postgres from pytest_httpserver import HTTPServer -from werkzeug.wrappers.request import Request from werkzeug.wrappers.response import Response if TYPE_CHECKING: from typing import Any from fixtures.httpserver import ListenAddress + from fixtures.neon_fixtures import ( + NeonEnvBuilder, + ) + from fixtures.pg_version import PgVersion + from werkzeug.wrappers.request import Request # use neon_env_builder_local fixture to override the default neon_env_builder fixture @@ -31,13 +31,13 @@ def neon_env_builder_local( neon_env_builder: NeonEnvBuilder, test_output_dir: Path, pg_distrib_dir: Path, - pg_version: PgVersion, ) -> NeonEnvBuilder: test_local_pginstall = test_output_dir / "pg_install" log.info(f"copy {pg_distrib_dir} to {test_local_pginstall}") - shutil.copytree( - pg_distrib_dir / pg_version.v_prefixed, test_local_pginstall / pg_version.v_prefixed - ) + + # We can't copy only the version that we are currently testing because other + # binaries like the storage controller need specific Postgres versions. + shutil.copytree(pg_distrib_dir, test_local_pginstall) neon_env_builder.pg_distrib_dir = test_local_pginstall log.info(f"local neon_env_builder.pg_distrib_dir: {neon_env_builder.pg_distrib_dir}") @@ -45,89 +45,92 @@ def neon_env_builder_local( return neon_env_builder -@skip_on_postgres(PgVersion.V16, reason="TODO: PG16 extension building") -@skip_on_postgres(PgVersion.V17, reason="TODO: PG17 extension building") def test_remote_extensions( httpserver: HTTPServer, neon_env_builder_local: NeonEnvBuilder, httpserver_listen_address: ListenAddress, + test_output_dir: Path, + base_dir: Path, pg_version: PgVersion, ): - # setup mock http server - # that expects request for anon.tar.zst - # and returns the requested file + # Setup a mock nginx S3 gateway which will return our test extension. (host, port) = httpserver_listen_address extensions_endpoint = f"http://{host}:{port}/pg-ext-s3-gateway" build_tag = os.environ.get("BUILD_TAG", "latest") - archive_path = f"{build_tag}/v{pg_version}/extensions/anon.tar.zst" + archive_route = f"{build_tag}/v{pg_version}/extensions/test_extension.tar.zst" + tarball = test_output_dir / "test_extension.tar" + extension_dir = ( + base_dir / "test_runner" / "regress" / "data" / "test_remote_extensions" / "test_extension" + ) - def endpoint_handler_build_tag(request: Request) -> Response: + # Create tarball + with tarfile.open(tarball, "x") as tarf: + tarf.add( + extension_dir / "sql" / "test_extension--1.0.sql", + arcname="share/extension/test_extension--1.0.sql", + ) + tarf.add( + extension_dir / "sql" / "test_extension--1.0--1.1.sql", + arcname="share/extension/test_extension--1.0--1.1.sql", + ) + + def handler(request: Request) -> Response: log.info(f"request: {request}") - file_name = "anon.tar.zst" - file_path = f"test_runner/regress/data/extension_test/5670669815/v{pg_version}/extensions/anon.tar.zst" - file_size = os.path.getsize(file_path) - fh = open(file_path, "rb") + # Compress tarball + compressor = zstandard.ZstdCompressor() + with open(tarball, "rb") as f: + compressed_data = compressor.compress(f.read()) return Response( - fh, + compressed_data, mimetype="application/octet-stream", headers=[ - ("Content-Length", str(file_size)), - ("Content-Disposition", f'attachment; filename="{file_name}"'), + ("Content-Length", str(len(compressed_data))), ], direct_passthrough=True, ) httpserver.expect_request( - f"/pg-ext-s3-gateway/{archive_path}", method="GET" - ).respond_with_handler(endpoint_handler_build_tag) + f"/pg-ext-s3-gateway/{archive_route}", method="GET" + ).respond_with_handler(handler) # Start a compute node with remote_extension spec # and check that it can download the extensions and use them to CREATE EXTENSION. env = neon_env_builder_local.init_start() env.create_branch("test_remote_extensions") - endpoint = env.endpoints.create( - "test_remote_extensions", - config_lines=["log_min_messages=debug3"], - ) + endpoint = env.endpoints.create("test_remote_extensions") + + with open(extension_dir / "test_extension.control", encoding="utf-8") as f: + control_data = f.read() # mock remote_extensions spec spec: dict[str, Any] = { - "public_extensions": ["anon"], + "public_extensions": ["test_extension"], "custom_extensions": None, "library_index": { - "anon": "anon", + "test_extension": "test_extension", }, "extension_data": { - "anon": { + "test_extension": { "archive_path": "", "control_data": { - "anon.control": "# PostgreSQL Anonymizer (anon) extension\ncomment = 'Data anonymization tools'\ndefault_version = '1.1.0'\ndirectory='extension/anon'\nrelocatable = false\nrequires = 'pgcrypto'\nsuperuser = false\nmodule_pathname = '$libdir/anon'\ntrusted = true\n" + "test_extension.control": control_data, }, }, }, } - spec["extension_data"]["anon"]["archive_path"] = archive_path endpoint.create_remote_extension_spec(spec) - endpoint.start( - remote_ext_config=extensions_endpoint, - ) + endpoint.start(remote_ext_config=extensions_endpoint) - # this is expected to fail if there's no pgcrypto extension, that's ok - # we just want to check that the extension was downloaded - try: - with closing(endpoint.connect()) as conn: - with conn.cursor() as cur: - # Check that appropriate files were downloaded - cur.execute("CREATE EXTENSION anon") - res = [x[0] for x in cur.fetchall()] - log.info(res) - except Exception as err: - assert "pgcrypto" in str(err), f"unexpected error creating anon extension {err}" + with endpoint.connect() as conn: + with conn.cursor() as cur: + # Check that appropriate files were downloaded + cur.execute("CREATE EXTENSION test_extension VERSION '1.0'") + cur.execute("SELECT test_extension.motd()") httpserver.check() @@ -138,7 +141,47 @@ def test_remote_extensions( remote_ext_requests = metrics.query_all( "compute_ctl_remote_ext_requests_total", # Check that we properly report the filename in the metrics - {"filename": "anon.tar.zst"}, + {"filename": "test_extension.tar.zst"}, + ) + assert len(remote_ext_requests) == 1 + for sample in remote_ext_requests: + assert sample.value == 1 + + endpoint.stop() + + # Remove the extension files to force a redownload of the extension. + for file in ( + "test_extension.control", + "test_extension--1.0.sql", + "test_extension--1.0--1.1.sql", + ): + ( + test_output_dir + / "pg_install" + / f"v{pg_version}" + / "share" + / "postgresql" + / "extension" + / file + ).unlink() + + endpoint.start(remote_ext_config=extensions_endpoint) + + # Test that ALTER EXTENSION UPDATE statements also fetch remote extensions. + with endpoint.connect() as conn: + with conn.cursor() as cur: + # Check that appropriate files were downloaded + cur.execute("ALTER EXTENSION test_extension UPDATE TO '1.1'") + cur.execute("SELECT test_extension.fun_fact()") + + # Check that we properly recorded downloads in the metrics + client = endpoint.http_client() + raw_metrics = client.metrics() + metrics = parse_metrics(raw_metrics) + remote_ext_requests = metrics.query_all( + "compute_ctl_remote_ext_requests_total", + # Check that we properly report the filename in the metrics + {"filename": "test_extension.tar.zst"}, ) assert len(remote_ext_requests) == 1 for sample in remote_ext_requests: diff --git a/vendor/postgres-v14 b/vendor/postgres-v14 index b1425505c6..7b7592e740 160000 --- a/vendor/postgres-v14 +++ b/vendor/postgres-v14 @@ -1 +1 @@ -Subproject commit b1425505c6f9a622a5aadf3ee362740519993310 +Subproject commit 7b7592e74059f795b64f06860cea97673418f35e diff --git a/vendor/postgres-v15 b/vendor/postgres-v15 index 533be42f7d..ee794ba767 160000 --- a/vendor/postgres-v15 +++ b/vendor/postgres-v15 @@ -1 +1 @@ -Subproject commit 533be42f7da97e614ce1c494fafe3e49f53991b1 +Subproject commit ee794ba767eef9b10260ef67d3a58084f1dabd6f diff --git a/vendor/postgres-v16 b/vendor/postgres-v16 index 78050f965f..512856aaa8 160000 --- a/vendor/postgres-v16 +++ b/vendor/postgres-v16 @@ -1 +1 @@ -Subproject commit 78050f965f2e550fd6e58f837394cb3d080d7d42 +Subproject commit 512856aaa8bedbaa8f06811449518dcb0c2e5d8f diff --git a/vendor/postgres-v17 b/vendor/postgres-v17 index 780efda2ef..e5e87b9f52 160000 --- a/vendor/postgres-v17 +++ b/vendor/postgres-v17 @@ -1 +1 @@ -Subproject commit 780efda2ef8d629495cc289624534ba8cde40779 +Subproject commit e5e87b9f52d0eaeb83f3e2517bb9727aac37729b diff --git a/vendor/revisions.json b/vendor/revisions.json index 1a811cfa3d..1d76e1da01 100644 --- a/vendor/revisions.json +++ b/vendor/revisions.json @@ -1,18 +1,18 @@ { "v17": [ "17.4", - "780efda2ef8d629495cc289624534ba8cde40779" + "e5e87b9f52d0eaeb83f3e2517bb9727aac37729b" ], "v16": [ "16.8", - "78050f965f2e550fd6e58f837394cb3d080d7d42" + "512856aaa8bedbaa8f06811449518dcb0c2e5d8f" ], "v15": [ "15.12", - "533be42f7da97e614ce1c494fafe3e49f53991b1" + "ee794ba767eef9b10260ef67d3a58084f1dabd6f" ], "v14": [ "14.17", - "b1425505c6f9a622a5aadf3ee362740519993310" + "7b7592e74059f795b64f06860cea97673418f35e" ] } From c87d307e8c41207b62f0c1b4c4aa93967f981c09 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Sun, 9 Mar 2025 20:36:36 +0200 Subject: [PATCH 129/207] Print state of connection buffer when no response is receioved from PS for a long time (#11145) ## Problem See https://neondb.slack.com/archives/C08DE6Q9C3B Sometimes compute is not able to receive responses from PS for a long time (minutes). I do not think that the problem is at compute side, but in order to exclude this possibility I wan to see more information about connection state at compute side, particularly amount of data cached in connection buffer. ## Summary of changes Right now we are dumping state of socket buffer. This PR adds printing state of connection buffer. --------- Co-authored-by: Konstantin Knizhnik --- pgxn/neon/libpagestore.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pgxn/neon/libpagestore.c b/pgxn/neon/libpagestore.c index 49f12bbb9e..637281fe4a 100644 --- a/pgxn/neon/libpagestore.c +++ b/pgxn/neon/libpagestore.c @@ -16,6 +16,8 @@ #include +#include "libpq-int.h" + #include "access/xlog.h" #include "common/hashfn.h" #include "fmgr.h" @@ -815,9 +817,10 @@ retry: get_socket_stats(PQsocket(pageserver_conn), &sndbuf, &recvbuf); neon_shard_log(shard_no, LOG, - "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d)", + "no response received from pageserver for %0.3f s, still waiting (sent " UINT64_FORMAT " requests, received " UINT64_FORMAT " responses) (socket sndbuf=%d recvbuf=%d) (conn start=%d end=%d)", INSTR_TIME_GET_DOUBLE(since_start), - shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf); + shard->nrequests_sent, shard->nresponses_received, sndbuf, recvbuf, + pageserver_conn->inStart, pageserver_conn->inEnd); shard->receive_last_log_time = now; shard->receive_logged = true; } From 66881b439408a1753733e3319350ef948f330974 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:33:00 +0400 Subject: [PATCH 130/207] storcon: update scheduler stats when changing node's preferred az (#11147) ## Problem `home_shard_count` is not updated on the preferred AZ change. Closes: https://github.com/neondatabase/neon/issues/10493 ## Summary of changes - Update scheduler stats (node ref counts) on preferred AZ change. --- storage_controller/src/scheduler.rs | 77 +++++++++++++++++++++++++- storage_controller/src/service.rs | 7 ++- storage_controller/src/tenant_shard.rs | 41 +++++++++++++- 3 files changed, 117 insertions(+), 8 deletions(-) diff --git a/storage_controller/src/scheduler.rs b/storage_controller/src/scheduler.rs index 817cf04fe1..3d5f36fb98 100644 --- a/storage_controller/src/scheduler.rs +++ b/storage_controller/src/scheduler.rs @@ -409,13 +409,14 @@ impl ScheduleContext { } } -pub(crate) enum RefCountUpdate { +pub(crate) enum RefCountUpdate<'a> { PromoteSecondary, Attach, Detach, DemoteAttached, AddSecondary, RemoveSecondary, + ChangePreferredAzFrom(Option<&'a AvailabilityZone>), } impl Scheduler { @@ -578,6 +579,14 @@ impl Scheduler { node.home_shard_count -= 1; } } + RefCountUpdate::ChangePreferredAzFrom(old_az) => { + if Some(&node.az) == old_az { + node.home_shard_count -= 1; + } + if is_home_az { + node.home_shard_count += 1; + } + } } // Maybe update PageserverUtilization @@ -594,7 +603,8 @@ impl Scheduler { RefCountUpdate::PromoteSecondary | RefCountUpdate::Detach | RefCountUpdate::RemoveSecondary - | RefCountUpdate::DemoteAttached => { + | RefCountUpdate::DemoteAttached + | RefCountUpdate::ChangePreferredAzFrom(_) => { // De-referencing the node: leave the utilization's shard_count at a stale higher // value until some future heartbeat after we have physically removed this shard // from the node: this prevents the scheduler over-optimistically trying to schedule @@ -1535,4 +1545,67 @@ mod tests { shard.intent.clear(&mut scheduler); } } + + #[test] + fn change_preferred_az() { + let az_a = AvailabilityZone("az-a".to_string()); + let az_b = AvailabilityZone("az-b".to_string()); + + // 2 nodes: 1 az_a and 1 az_b. + let nodes = test_utils::make_test_nodes(2, &[az_a.clone(), az_b.clone()]); + let mut scheduler = Scheduler::new(nodes.values()); + + let tenant_shard_id = TenantShardId { + tenant_id: TenantId::generate(), + shard_number: ShardNumber(0), + shard_count: ShardCount(1), + }; + let shard_identity = ShardIdentity::new( + tenant_shard_id.shard_number, + tenant_shard_id.shard_count, + pageserver_api::shard::ShardStripeSize(1), + ) + .unwrap(); + // 1 attached and 1 secondary. + let mut shard = TenantShard::new( + tenant_shard_id, + shard_identity, + pageserver_api::controller_api::PlacementPolicy::Attached(1), + Some(az_a.clone()), + ); + + let mut context = ScheduleContext::default(); + shard.schedule(&mut scheduler, &mut context).unwrap(); + eprintln!("Scheduled shard at {:?}", shard.intent); + + for node in scheduler.nodes.values() { + // Only 2 nodes, one tenant shard should be scheduled on each of them. + assert_eq!(node.shard_count, 1); + if node.az == az_a { + assert_eq!(node.home_shard_count, 1); + } else { + assert_eq!(node.home_shard_count, 0); + } + } + + shard.set_preferred_az(&mut scheduler, Some(az_b.clone())); + // Home AZ flipped. + for node in scheduler.nodes.values() { + assert_eq!(node.shard_count, 1); + if node.az == az_a { + assert_eq!(node.home_shard_count, 0); + } else { + assert_eq!(node.home_shard_count, 1); + } + } + + shard.set_preferred_az(&mut scheduler, None); + // No home AZ. + for node in scheduler.nodes.values() { + assert_eq!(node.shard_count, 1); + assert_eq!(node.home_shard_count, 0); + } + + shard.intent.clear(&mut scheduler); + } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index d8c9ee70b1..2a68711977 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -6,7 +6,7 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, HashMap, HashSet}; use std::error::Error; use std::num::NonZeroU32; -use std::ops::Deref; +use std::ops::{Deref, DerefMut}; use std::path::PathBuf; use std::str::FromStr; use std::sync::Arc; @@ -8321,10 +8321,11 @@ impl Service { let mut updated_in_mem_and_db = Vec::default(); let mut locked = self.inner.write().unwrap(); + let state = locked.deref_mut(); for (tid, az_id) in updated { - let shard = locked.tenants.get_mut(&tid); + let shard = state.tenants.get_mut(&tid); if let Some(shard) = shard { - shard.set_preferred_az(az_id); + shard.set_preferred_az(&mut state.scheduler, az_id); updated_in_mem_and_db.push(tid); } } diff --git a/storage_controller/src/tenant_shard.rs b/storage_controller/src/tenant_shard.rs index 96ff70a951..80f42e04a9 100644 --- a/storage_controller/src/tenant_shard.rs +++ b/storage_controller/src/tenant_shard.rs @@ -328,6 +328,37 @@ impl IntentState { false } } + + pub(crate) fn set_preferred_az( + &mut self, + scheduler: &mut Scheduler, + preferred_az: Option, + ) { + let new_az = preferred_az.as_ref(); + let old_az = self.preferred_az_id.as_ref(); + + if old_az != new_az { + if let Some(node_id) = self.attached { + scheduler.update_node_ref_counts( + node_id, + new_az, + RefCountUpdate::ChangePreferredAzFrom(old_az), + ); + } + for node_id in &self.secondary { + scheduler.update_node_ref_counts( + *node_id, + new_az, + RefCountUpdate::ChangePreferredAzFrom(old_az), + ); + } + self.preferred_az_id = preferred_az; + } + } + + pub(crate) fn get_preferred_az(&self) -> Option<&AvailabilityZone> { + self.preferred_az_id.as_ref() + } } impl Drop for IntentState { @@ -1838,11 +1869,15 @@ impl TenantShard { } pub(crate) fn preferred_az(&self) -> Option<&AvailabilityZone> { - self.intent.preferred_az_id.as_ref() + self.intent.get_preferred_az() } - pub(crate) fn set_preferred_az(&mut self, preferred_az_id: Option) { - self.intent.preferred_az_id = preferred_az_id; + pub(crate) fn set_preferred_az( + &mut self, + scheduler: &mut Scheduler, + preferred_az_id: Option, + ) { + self.intent.set_preferred_az(scheduler, preferred_az_id); } /// Returns all the nodes to which this tenant shard is attached according to the From 5d38fd6c43c67a5988c81d09e8475944e6004847 Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Mon, 10 Mar 2025 14:55:16 +0200 Subject: [PATCH 131/207] fix(proxy): Use testodrome query id for latency measurement (#11150) Add a new neon option "neon_query_id" to glue data with testodrome queries. Log latency in microseconds always. Relates to the #22486 --- proxy/src/compute.rs | 3 ++- proxy/src/context/mod.rs | 29 +++++++++++++++++++++++++++++ proxy/src/metrics.rs | 7 +++++-- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index dfa6015b10..3852bfe348 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -287,9 +287,10 @@ impl ConnCfg { // TODO: lots of useful info but maybe we can move it elsewhere (eg traces?) info!( cold_start_info = ctx.cold_start_info().as_str(), - "connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}", + "connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}, query_id={}", self.0.get_ssl_mode(), ctx.get_proxy_latency(), + ctx.get_testodrome_id(), ); // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw. diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index e10a04b4f1..4f72a86f30 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -63,6 +63,7 @@ struct RequestContextInner { success: bool, pub(crate) cold_start_info: ColdStartInfo, pg_options: Option, + testodrome_query_id: Option, // extra // This sender is here to keep the request monitoring channel open while requests are taking place. @@ -110,6 +111,7 @@ impl Clone for RequestContext { rejected: inner.rejected, cold_start_info: inner.cold_start_info, pg_options: inner.pg_options.clone(), + testodrome_query_id: inner.testodrome_query_id.clone(), sender: None, disconnect_sender: None, @@ -160,6 +162,7 @@ impl RequestContext { rejected: None, cold_start_info: ColdStartInfo::Unknown, pg_options: None, + testodrome_query_id: None, sender: LOG_CHAN.get().and_then(|tx| tx.upgrade()), disconnect_sender: LOG_CHAN_DISCONNECT.get().and_then(|tx| tx.upgrade()), @@ -210,6 +213,19 @@ impl RequestContext { this.set_dbname(dbname.into()); } + // Try to get testodrome_query_id directly from parameters + if let Some(options_str) = options.get("options") { + // If not found directly, try to extract it from the options string + for option in options_str.split_whitespace() { + if option.starts_with("neon_query_id:") { + if let Some(value) = option.strip_prefix("neon_query_id:") { + this.set_testodrome_id(value.to_string()); + break; + } + } + } + } + this.pg_options = Some(options); } @@ -355,6 +371,15 @@ impl RequestContext { .accumulated() } + pub(crate) fn get_testodrome_id(&self) -> String { + self.0 + .try_lock() + .expect("should not deadlock") + .testodrome_query_id + .clone() + .unwrap_or_default() + } + pub(crate) fn success(&self) { self.0 .try_lock() @@ -416,6 +441,10 @@ impl RequestContextInner { self.user = Some(user); } + fn set_testodrome_id(&mut self, query_id: String) { + self.testodrome_query_id = Some(query_id); + } + fn has_private_peer_addr(&self) -> bool { match self.conn_info.addr.ip() { IpAddr::V4(ip) => ip.is_private(), diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index b6a2a059ea..29834760c0 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -406,8 +406,11 @@ impl std::fmt::Display for LatencyAccumulated { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( f, - "client: {:?}, cplane: {:?}, compute: {:?}, retry: {:?}", - self.client, self.cplane, self.compute, self.retry + "client: {}, cplane: {}, compute: {}, retry: {}", + self.client.as_micros(), + self.cplane.as_micros(), + self.compute.as_micros(), + self.retry.as_micros() ) } } From 33c3c34c959020239a737522b80e79c277746908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Mon, 10 Mar 2025 14:24:14 +0100 Subject: [PATCH 132/207] Appease cargo deny errors (#11142) * pprof can also use `prost` as a backend, switch to it as `protobuf` has no update available but a security issue. * `paste` is a build time dependency, so add the unmaintained warning as an exception. --- Cargo.lock | 111 ++++++++++++++++++++------------ Cargo.toml | 2 +- deny.toml | 4 ++ libs/http-utils/src/endpoint.rs | 6 +- workspace_hack/Cargo.toml | 6 +- 5 files changed, 82 insertions(+), 47 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d3b09fa360..f15c6e857f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4062,7 +4062,7 @@ dependencies = [ "opentelemetry-http", "opentelemetry-proto", "opentelemetry_sdk", - "prost", + "prost 0.13.3", "reqwest", "thiserror 1.0.69", ] @@ -4075,7 +4075,7 @@ checksum = "a6e05acbfada5ec79023c85368af14abd0b307c015e9064d249b2a950ef459a6" dependencies = [ "opentelemetry", "opentelemetry_sdk", - "prost", + "prost 0.13.3", "tonic", ] @@ -4830,8 +4830,10 @@ dependencies = [ "nix 0.26.4", "once_cell", "parking_lot 0.12.1", - "protobuf", - "protobuf-codegen-pure", + "prost 0.12.6", + "prost-build 0.12.6", + "prost-derive 0.12.6", + "sha2", "smallvec", "symbolic-demangle", "tempfile", @@ -4850,7 +4852,7 @@ dependencies = [ "inferno 0.12.0", "num", "paste", - "prost", + "prost 0.13.3", ] [[package]] @@ -4943,6 +4945,16 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "prost" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" +dependencies = [ + "bytes", + "prost-derive 0.12.6", +] + [[package]] name = "prost" version = "0.13.3" @@ -4950,7 +4962,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.13.3", +] + +[[package]] +name = "prost-build" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" +dependencies = [ + "bytes", + "heck", + "itertools 0.10.5", + "log", + "multimap", + "once_cell", + "petgraph", + "prettyplease", + "prost 0.12.6", + "prost-types 0.12.6", + "regex", + "syn 2.0.90", + "tempfile", ] [[package]] @@ -4967,13 +5000,26 @@ dependencies = [ "once_cell", "petgraph", "prettyplease", - "prost", - "prost-types", + "prost 0.13.3", + "prost-types 0.13.3", "regex", "syn 2.0.90", "tempfile", ] +[[package]] +name = "prost-derive" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" +dependencies = [ + "anyhow", + "itertools 0.10.5", + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "prost-derive" version = "0.13.3" @@ -4987,38 +5033,22 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "prost-types" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost 0.12.6", +] + [[package]] name = "prost-types" version = "0.13.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" dependencies = [ - "prost", -] - -[[package]] -name = "protobuf" -version = "2.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" - -[[package]] -name = "protobuf-codegen" -version = "2.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "033460afb75cf755fcfc16dfaed20b86468082a2ea24e05ac35ab4a099a017d6" -dependencies = [ - "protobuf", -] - -[[package]] -name = "protobuf-codegen-pure" -version = "2.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a29399fc94bcd3eeaa951c715f7bea69409b2445356b00519740bcd6ddd865" -dependencies = [ - "protobuf", - "protobuf-codegen", + "prost 0.13.3", ] [[package]] @@ -6566,7 +6596,7 @@ dependencies = [ "metrics", "once_cell", "parking_lot 0.12.1", - "prost", + "prost 0.13.3", "rustls 0.23.18", "tokio", "tonic", @@ -7338,7 +7368,7 @@ dependencies = [ "hyper-util", "percent-encoding", "pin-project", - "prost", + "prost 0.13.3", "rustls-native-certs 0.8.0", "rustls-pemfile 2.1.1", "tokio", @@ -7358,8 +7388,8 @@ checksum = "9557ce109ea773b399c9b9e5dca39294110b74f1f342cb347a80d1fce8c26a11" dependencies = [ "prettyplease", "proc-macro2", - "prost-build", - "prost-types", + "prost-build 0.13.3", + "prost-types 0.13.3", "quote", "syn 2.0.90", ] @@ -7892,7 +7922,7 @@ dependencies = [ "pageserver_api", "postgres_ffi", "pprof", - "prost", + "prost 0.13.3", "remote_storage", "serde", "serde_json", @@ -8349,6 +8379,7 @@ dependencies = [ "hyper-util", "indexmap 1.9.3", "indexmap 2.0.1", + "itertools 0.10.5", "itertools 0.12.1", "lazy_static", "libc", @@ -8367,7 +8398,7 @@ dependencies = [ "parquet", "prettyplease", "proc-macro2", - "prost", + "prost 0.13.3", "quote", "rand 0.8.5", "regex", diff --git a/Cargo.toml b/Cargo.toml index d11fe4f449..427d1e98ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -139,7 +139,7 @@ parquet = { version = "53", default-features = false, features = ["zstd"] } parquet_derive = "53" pbkdf2 = { version = "0.12.1", features = ["simple", "std"] } pin-project-lite = "0.2" -pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "protobuf", "protobuf-codec"] } +pprof = { version = "0.14", features = ["criterion", "flamegraph", "frame-pointer", "prost-codec"] } procfs = "0.16" prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prost = "0.13" diff --git a/deny.toml b/deny.toml index b551405568..ed7aa9ef9f 100644 --- a/deny.toml +++ b/deny.toml @@ -27,6 +27,10 @@ yanked = "warn" id = "RUSTSEC-2023-0071" reason = "the marvin attack only affects private key decryption, not public key signature verification" +[[advisories.ignore]] +id = "RUSTSEC-2024-0436" +reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact." + # This section is considered when running `cargo deny check licenses` # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html diff --git a/libs/http-utils/src/endpoint.rs b/libs/http-utils/src/endpoint.rs index f4f93df62f..5588f6d87e 100644 --- a/libs/http-utils/src/endpoint.rs +++ b/libs/http-utils/src/endpoint.rs @@ -399,12 +399,10 @@ pub async fn profile_cpu_handler(req: Request) -> Result, A // Return the report in the requested format. match format { Format::Pprof => { - let mut body = Vec::new(); - report + let body = report .pprof() .map_err(|err| ApiError::InternalServerError(err.into()))? - .write_to_vec(&mut body) - .map_err(|err| ApiError::InternalServerError(err.into()))?; + .encode_to_vec(); Response::builder() .status(200) diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 183cc66ab9..4a6ab6e745 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -47,7 +47,8 @@ hyper-dff4ba8e3ae991db = { package = "hyper", version = "1", features = ["full"] hyper-util = { version = "0.1", features = ["client-legacy", "http1", "http2", "server", "service"] } indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools = { version = "0.12" } +itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12" } +itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" } lazy_static = { version = "1", default-features = false, features = ["spin_no_std"] } libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -114,7 +115,8 @@ half = { version = "2", default-features = false, features = ["num-traits"] } hashbrown = { version = "0.14", features = ["raw"] } indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools = { version = "0.12" } +itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12" } +itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" } libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } From f17931870f6c56af41bd940f89c0d1383fe4b836 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Mon, 10 Mar 2025 14:27:23 +0100 Subject: [PATCH 133/207] fix(ci): use syntax for pinging groups on slack (#11135) ## Problem Pinging groups on slack didn't work, because I didn't use the correct syntax. ## Summary of changes Use the correct syntax for pinging groups. --- .github/actionlint.yml | 2 ++ .github/workflows/build_and_test.yml | 2 +- .github/workflows/cargo-deny.yml | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 667ff7f92e..39a30d9a39 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -33,3 +33,5 @@ config-variables: - NEON_PROD_AWS_ACCOUNT_ID - AWS_ECR_REGION - BENCHMARK_LARGE_OLTP_PROJECTID + - SLACK_ON_CALL_DEVPROD_STREAM + - SLACK_RUST_CHANNEL_ID diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 66758ca49f..197b83fac4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1231,7 +1231,7 @@ jobs: payload: | channel: ${{ vars.SLACK_STORAGE_CHANNEL_ID }} text: | - 🔴 @oncall-storage: deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>. + 🔴 : deploy job on release branch had unexpected status "${{ needs.deploy.result }}" <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run>. # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory promote-compatibility-data: diff --git a/.github/workflows/cargo-deny.yml b/.github/workflows/cargo-deny.yml index 433b377c32..222f7e9787 100644 --- a/.github/workflows/cargo-deny.yml +++ b/.github/workflows/cargo-deny.yml @@ -7,7 +7,7 @@ on: required: false type: string schedule: - - cron: '0 0 * * *' + - cron: '0 10 * * *' jobs: cargo-deny: @@ -50,8 +50,9 @@ jobs: method: chat.postMessage token: ${{ secrets.SLACK_BOT_TOKEN }} payload: | - channel: ${{ vars.SLACK_CICD_CHANNEL_ID }} + channel: ${{ vars.SLACK_ON_CALL_DEVPROD_STREAM }} text: | Periodic cargo-deny on ${{ matrix.ref }}: ${{ job.status }} <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|GitHub Run> - Pinging @oncall-devprod. + Fixing the problem should be fairly straight forward from the logs. If not, <#${{ vars.SLACK_RUST_CHANNEL_ID }}> is there to help. + Pinging . From 63b22d3fb107498a838f0d345963471b00ec620c Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Mon, 10 Mar 2025 19:07:59 +0400 Subject: [PATCH 134/207] pageserver: https for management API (#11025) ## Problem Storage controller uses unencrypted HTTP requests for pageserver management API. Closes: https://github.com/neondatabase/cloud/issues/24283 ## Summary of changes - Implement `http_utils::server::Server` with TLS support. - Replace `hyper0::server::Server` with `http_utils::server::Server` in pageserver. - Add HTTPS handler for pageserver management API. - Generate local SSL certificates in neon local. --- Cargo.lock | 11 +- Cargo.toml | 1 + control_plane/src/bin/neon_local.rs | 2 + control_plane/src/local_env.rs | 142 ++++++++++++++++ control_plane/src/pageserver.rs | 41 ++++- control_plane/src/storage_controller.rs | 8 + control_plane/storcon_cli/src/main.rs | 15 +- libs/http-utils/Cargo.toml | 2 + libs/http-utils/src/lib.rs | 3 +- libs/http-utils/src/server.rs | 155 ++++++++++++++++++ libs/pageserver_api/src/config.rs | 10 ++ libs/pageserver_api/src/config/tests.rs | 24 +++ pageserver/Cargo.toml | 4 + pageserver/client/src/mgmt_api.rs | 28 +++- pageserver/pagebench/Cargo.toml | 1 + pageserver/pagebench/src/cmd/aux_files.rs | 3 +- pageserver/pagebench/src/cmd/basebackup.rs | 3 +- .../pagebench/src/cmd/getpage_latest_lsn.rs | 3 +- .../src/cmd/ondemand_download_churn.rs | 3 +- .../cmd/trigger_initial_size_calculation.rs | 3 +- pageserver/src/bin/pageserver.rs | 90 ++++++++-- pageserver/src/config.rs | 11 ++ pageserver/src/controller_upcall_client.rs | 2 +- pageserver/src/lib.rs | 11 ++ storage_controller/src/heartbeater.rs | 2 + storage_controller/src/http.rs | 4 +- storage_controller/src/node.rs | 23 ++- storage_controller/src/pageserver_client.rs | 15 +- storage_controller/src/reconciler.rs | 10 +- storage_controller/src/service.rs | 78 +++++++-- test_runner/fixtures/neon_fixtures.py | 24 ++- test_runner/regress/test_ssl.py | 15 ++ 32 files changed, 679 insertions(+), 68 deletions(-) create mode 100644 libs/http-utils/src/server.rs create mode 100644 test_runner/regress/test_ssl.py diff --git a/Cargo.lock b/Cargo.lock index f15c6e857f..fa40009769 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2848,6 +2848,7 @@ dependencies = [ "anyhow", "bytes", "fail", + "futures", "hyper 0.14.30", "itertools 0.10.5", "jemalloc_pprof", @@ -2861,6 +2862,7 @@ dependencies = [ "serde_path_to_error", "thiserror 1.0.69", "tokio", + "tokio-rustls 0.26.0", "tokio-stream", "tokio-util", "tracing", @@ -4189,6 +4191,7 @@ dependencies = [ "pageserver_api", "pageserver_client", "rand 0.8.5", + "reqwest", "serde", "serde_json", "tokio", @@ -4278,6 +4281,9 @@ dependencies = [ "remote_storage", "reqwest", "rpds", + "rustls 0.23.18", + "rustls-pemfile 2.1.1", + "rustls-pki-types", "scopeguard", "send-future", "serde", @@ -4296,6 +4302,7 @@ dependencies = [ "tokio-epoll-uring", "tokio-io-timeout", "tokio-postgres", + "tokio-rustls 0.26.0", "tokio-stream", "tokio-tar", "tokio-util", @@ -5908,9 +5915,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b" +checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c" [[package]] name = "rustls-webpki" diff --git a/Cargo.toml b/Cargo.toml index 427d1e98ad..c59c4c5435 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -155,6 +155,7 @@ rpds = "0.13" rustc-hash = "1.1.0" rustls = { version = "0.23.16", default-features = false } rustls-pemfile = "2" +rustls-pki-types = "1.11" scopeguard = "1.1" sysinfo = "0.29.2" sd-notify = "0.4.1" diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 375b5d87d0..ba1411b615 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -963,6 +963,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result { id: pageserver_id, listen_pg_addr: format!("127.0.0.1:{pg_port}"), listen_http_addr: format!("127.0.0.1:{http_port}"), + listen_https_addr: None, pg_auth_type: AuthType::Trust, http_auth_type: AuthType::Trust, other: Default::default(), @@ -977,6 +978,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result { default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)), storage_controller: None, control_plane_compute_hook_api: None, + generate_local_ssl_certs: false, } }; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index f4026efbbf..2e8fb8f07b 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -81,6 +81,10 @@ pub struct LocalEnv { // but deserialization into a generic toml object as `toml::Value::try_from` fails with an error. // https://toml.io/en/v1.0.0 does not contain a concept of "a table inside another table". pub branch_name_mappings: HashMap>, + + /// Flag to generate SSL certificates for components that need it. + /// Also generates root CA certificate that is used to sign all other certificates. + pub generate_local_ssl_certs: bool, } /// On-disk state stored in `.neon/config`. @@ -102,6 +106,10 @@ pub struct OnDiskConfig { pub control_plane_api: Option, pub control_plane_compute_hook_api: Option, branch_name_mappings: HashMap>, + // Note: skip serializing because in compat tests old storage controller fails + // to load new config file. May be removed after this field is in release branch. + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub generate_local_ssl_certs: bool, } fn fail_if_pageservers_field_specified<'de, D>(_: D) -> Result, D::Error> @@ -129,6 +137,7 @@ pub struct NeonLocalInitConf { pub safekeepers: Vec, pub control_plane_api: Option, pub control_plane_compute_hook_api: Option>, + pub generate_local_ssl_certs: bool, } /// Broker config for cluster internal communication. @@ -165,6 +174,9 @@ pub struct NeonStorageControllerConf { #[serde(with = "humantime_serde")] pub long_reconcile_threshold: Option, + + #[serde(default)] + pub use_https_pageserver_api: bool, } impl NeonStorageControllerConf { @@ -188,6 +200,7 @@ impl Default for NeonStorageControllerConf { max_secondary_lag_bytes: None, heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL, long_reconcile_threshold: None, + use_https_pageserver_api: false, } } } @@ -217,6 +230,7 @@ pub struct PageServerConf { pub id: NodeId, pub listen_pg_addr: String, pub listen_http_addr: String, + pub listen_https_addr: Option, pub pg_auth_type: AuthType, pub http_auth_type: AuthType, pub no_sync: bool, @@ -228,6 +242,7 @@ impl Default for PageServerConf { id: NodeId(0), listen_pg_addr: String::new(), listen_http_addr: String::new(), + listen_https_addr: None, pg_auth_type: AuthType::Trust, http_auth_type: AuthType::Trust, no_sync: false, @@ -243,6 +258,7 @@ pub struct NeonLocalInitPageserverConf { pub id: NodeId, pub listen_pg_addr: String, pub listen_http_addr: String, + pub listen_https_addr: Option, pub pg_auth_type: AuthType, pub http_auth_type: AuthType, #[serde(default, skip_serializing_if = "std::ops::Not::not")] @@ -257,6 +273,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf { id, listen_pg_addr, listen_http_addr, + listen_https_addr, pg_auth_type, http_auth_type, no_sync, @@ -266,6 +283,7 @@ impl From<&NeonLocalInitPageserverConf> for PageServerConf { id: *id, listen_pg_addr: listen_pg_addr.clone(), listen_http_addr: listen_http_addr.clone(), + listen_https_addr: listen_https_addr.clone(), pg_auth_type: *pg_auth_type, http_auth_type: *http_auth_type, no_sync: *no_sync, @@ -410,6 +428,41 @@ impl LocalEnv { } } + pub fn ssl_ca_cert_path(&self) -> Option { + if self.generate_local_ssl_certs { + Some(self.base_data_dir.join("rootCA.crt")) + } else { + None + } + } + + pub fn ssl_ca_key_path(&self) -> Option { + if self.generate_local_ssl_certs { + Some(self.base_data_dir.join("rootCA.key")) + } else { + None + } + } + + pub fn generate_ssl_ca_cert(&self) -> anyhow::Result<()> { + let cert_path = self.ssl_ca_cert_path().unwrap(); + let key_path = self.ssl_ca_key_path().unwrap(); + if !fs::exists(cert_path.as_path())? { + generate_ssl_ca_cert(cert_path.as_path(), key_path.as_path())?; + } + Ok(()) + } + + pub fn generate_ssl_cert(&self, cert_path: &Path, key_path: &Path) -> anyhow::Result<()> { + self.generate_ssl_ca_cert()?; + generate_ssl_cert( + cert_path, + key_path, + self.ssl_ca_cert_path().unwrap().as_path(), + self.ssl_ca_key_path().unwrap().as_path(), + ) + } + /// Inspect the base data directory and extract the instance id and instance directory path /// for all storage controller instances pub async fn storage_controller_instances(&self) -> std::io::Result> { @@ -519,6 +572,7 @@ impl LocalEnv { control_plane_api, control_plane_compute_hook_api, branch_name_mappings, + generate_local_ssl_certs, } = on_disk_config; LocalEnv { base_data_dir: repopath.to_owned(), @@ -533,6 +587,7 @@ impl LocalEnv { control_plane_api: control_plane_api.unwrap(), control_plane_compute_hook_api, branch_name_mappings, + generate_local_ssl_certs, } }; @@ -568,6 +623,7 @@ impl LocalEnv { struct PageserverConfigTomlSubset { listen_pg_addr: String, listen_http_addr: String, + listen_https_addr: Option, pg_auth_type: AuthType, http_auth_type: AuthType, #[serde(default)] @@ -592,6 +648,7 @@ impl LocalEnv { let PageserverConfigTomlSubset { listen_pg_addr, listen_http_addr, + listen_https_addr, pg_auth_type, http_auth_type, no_sync, @@ -609,6 +666,7 @@ impl LocalEnv { }, listen_pg_addr, listen_http_addr, + listen_https_addr, pg_auth_type, http_auth_type, no_sync, @@ -636,6 +694,7 @@ impl LocalEnv { control_plane_api: Some(self.control_plane_api.clone()), control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(), branch_name_mappings: self.branch_name_mappings.clone(), + generate_local_ssl_certs: self.generate_local_ssl_certs, }, ) } @@ -718,6 +777,7 @@ impl LocalEnv { safekeepers, control_plane_api, control_plane_compute_hook_api, + generate_local_ssl_certs, } = conf; // Find postgres binaries. @@ -766,8 +826,13 @@ impl LocalEnv { control_plane_api: control_plane_api.unwrap(), control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(), branch_name_mappings: Default::default(), + generate_local_ssl_certs, }; + if generate_local_ssl_certs { + env.generate_ssl_ca_cert()?; + } + // create endpoints dir fs::create_dir_all(env.endpoints_path())?; @@ -851,3 +916,80 @@ fn generate_auth_keys(private_key_path: &Path, public_key_path: &Path) -> anyhow } Ok(()) } + +fn generate_ssl_ca_cert(cert_path: &Path, key_path: &Path) -> anyhow::Result<()> { + // openssl req -x509 -newkey rsa:2048 -nodes -subj "/CN=Neon Local CA" -days 36500 \ + // -out rootCA.crt -keyout rootCA.key + let keygen_output = Command::new("openssl") + .args([ + "req", "-x509", "-newkey", "rsa:2048", "-nodes", "-days", "36500", + ]) + .args(["-subj", "/CN=Neon Local CA"]) + .args(["-out", cert_path.to_str().unwrap()]) + .args(["-keyout", key_path.to_str().unwrap()]) + .output() + .context("failed to generate CA certificate")?; + if !keygen_output.status.success() { + bail!( + "openssl failed: '{}'", + String::from_utf8_lossy(&keygen_output.stderr) + ); + } + Ok(()) +} + +fn generate_ssl_cert( + cert_path: &Path, + key_path: &Path, + ca_cert_path: &Path, + ca_key_path: &Path, +) -> anyhow::Result<()> { + // Generate Certificate Signing Request (CSR). + let mut csr_path = cert_path.to_path_buf(); + csr_path.set_extension(".csr"); + + // openssl req -new -nodes -newkey rsa:2048 -keyout server.key -out server.csr \ + // -subj "/CN=localhost" -addext "subjectAltName=DNS:localhost,IP:127.0.0.1" + let keygen_output = Command::new("openssl") + .args(["req", "-new", "-nodes"]) + .args(["-newkey", "rsa:2048"]) + .args(["-subj", "/CN=localhost"]) + .args(["-addext", "subjectAltName=DNS:localhost,IP:127.0.0.1"]) + .args(["-keyout", key_path.to_str().unwrap()]) + .args(["-out", csr_path.to_str().unwrap()]) + .output() + .context("failed to generate CSR")?; + if !keygen_output.status.success() { + bail!( + "openssl failed: '{}'", + String::from_utf8_lossy(&keygen_output.stderr) + ); + } + + // Sign CSR with CA key. + // + // openssl x509 -req -in server.csr -CA rootCA.crt -CAkey rootCA.key -CAcreateserial \ + // -out server.crt -days 36500 -copy_extensions copyall + let keygen_output = Command::new("openssl") + .args(["x509", "-req"]) + .args(["-in", csr_path.to_str().unwrap()]) + .args(["-CA", ca_cert_path.to_str().unwrap()]) + .args(["-CAkey", ca_key_path.to_str().unwrap()]) + .arg("-CAcreateserial") + .args(["-out", cert_path.to_str().unwrap()]) + .args(["-days", "36500"]) + .args(["-copy_extensions", "copyall"]) + .output() + .context("failed to sign CSR")?; + if !keygen_output.status.success() { + bail!( + "openssl failed: '{}'", + String::from_utf8_lossy(&keygen_output.stderr) + ); + } + + // Remove CSR file as it's not needed anymore. + fs::remove_file(csr_path)?; + + Ok(()) +} diff --git a/control_plane/src/pageserver.rs b/control_plane/src/pageserver.rs index 39656bdbbe..eeaad10d26 100644 --- a/control_plane/src/pageserver.rs +++ b/control_plane/src/pageserver.rs @@ -21,6 +21,7 @@ use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; use postgres_backend::AuthType; use postgres_connection::{PgConnectionConfig, parse_host_port}; +use reqwest::Certificate; use utils::auth::{Claims, Scope}; use utils::id::{NodeId, TenantId, TimelineId}; use utils::lsn::Lsn; @@ -49,12 +50,29 @@ impl PageServerNode { let (host, port) = parse_host_port(&conf.listen_pg_addr).expect("Unable to parse listen_pg_addr"); let port = port.unwrap_or(5432); + + let ssl_ca_cert = env.ssl_ca_cert_path().map(|ssl_ca_file| { + let buf = std::fs::read(ssl_ca_file).expect("SSL root CA file should exist"); + Certificate::from_pem(&buf).expect("CA certificate should be valid") + }); + + let endpoint = if env.storage_controller.use_https_pageserver_api { + format!( + "https://{}", + conf.listen_https_addr.as_ref().expect( + "listen https address should be specified if use_https_pageserver_api is on" + ) + ) + } else { + format!("http://{}", conf.listen_http_addr) + }; + Self { pg_connection_config: PgConnectionConfig::new_host_port(host, port), conf: conf.clone(), env: env.clone(), http_client: mgmt_api::Client::new( - format!("http://{}", conf.listen_http_addr), + endpoint, { match conf.http_auth_type { AuthType::Trust => None, @@ -65,7 +83,9 @@ impl PageServerNode { } } .as_deref(), - ), + ssl_ca_cert, + ) + .expect("Client constructs with no errors"), } } @@ -220,6 +240,13 @@ impl PageServerNode { .context("write identity toml")?; drop(identity_toml); + if self.env.generate_local_ssl_certs { + self.env.generate_ssl_cert( + datadir.join("server.crt").as_path(), + datadir.join("server.key").as_path(), + )?; + } + // TODO: invoke a TBD config-check command to validate that pageserver will start with the written config // Write metadata file, used by pageserver on startup to register itself with @@ -230,6 +257,15 @@ impl PageServerNode { parse_host_port(&self.conf.listen_http_addr).expect("Unable to parse listen_http_addr"); let http_port = http_port.unwrap_or(9898); + let https_port = match self.conf.listen_https_addr.as_ref() { + Some(https_addr) => { + let (_https_host, https_port) = + parse_host_port(https_addr).expect("Unable to parse listen_https_addr"); + Some(https_port.unwrap_or(9899)) + } + None => None, + }; + // Intentionally hand-craft JSON: this acts as an implicit format compat test // in case the pageserver-side structure is edited, and reflects the real life // situation: the metadata is written by some other script. @@ -240,6 +276,7 @@ impl PageServerNode { postgres_port: self.pg_connection_config.port(), http_host: "localhost".to_string(), http_port, + https_port, other: HashMap::from([( "availability_zone_id".to_string(), serde_json::json!(az_id), diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 3604e4a241..1df50e211c 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -534,6 +534,14 @@ impl StorageController { args.push("--start-as-candidate".to_string()); } + if self.config.use_https_pageserver_api { + args.push("--use-https-pageserver-api".to_string()); + } + + if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() { + args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap())); + } + if let Some(private_key) = &self.private_key { let claims = Claims::new(None, Scope::PageServerApi); let jwt_token = diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index c3f157a9cc..b5c4f21e97 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -1,4 +1,5 @@ use std::collections::{HashMap, HashSet}; +use std::path::PathBuf; use std::str::FromStr; use std::time::Duration; @@ -278,6 +279,10 @@ struct Cli { /// a token with both scopes to use with this tool. jwt: Option, + #[arg(long)] + /// Trusted root CA certificate to use in https APIs. + ssl_ca_file: Option, + #[command(subcommand)] command: Command, } @@ -388,9 +393,17 @@ async fn main() -> anyhow::Result<()> { let storcon_client = Client::new(cli.api.clone(), cli.jwt.clone()); + let ssl_ca_cert = match &cli.ssl_ca_file { + Some(ssl_ca_file) => { + let buf = tokio::fs::read(ssl_ca_file).await?; + Some(reqwest::Certificate::from_pem(&buf)?) + } + None => None, + }; + let mut trimmed = cli.api.to_string(); trimmed.pop(); - let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref()); + let vps_client = mgmt_api::Client::new(trimmed, cli.jwt.as_deref(), ssl_ca_cert)?; match cli.command { Command::NodeRegister { diff --git a/libs/http-utils/Cargo.toml b/libs/http-utils/Cargo.toml index d16dac7876..00b3777a63 100644 --- a/libs/http-utils/Cargo.toml +++ b/libs/http-utils/Cargo.toml @@ -8,6 +8,7 @@ license.workspace = true anyhow.workspace = true bytes.workspace = true fail.workspace = true +futures.workspace = true hyper0.workspace = true itertools.workspace = true jemalloc_pprof.workspace = true @@ -21,6 +22,7 @@ serde_path_to_error.workspace = true thiserror.workspace = true tracing.workspace = true tokio.workspace = true +tokio-rustls.workspace = true tokio-util.workspace = true url.workspace = true uuid.workspace = true diff --git a/libs/http-utils/src/lib.rs b/libs/http-utils/src/lib.rs index 1e9b3c761a..dd520ef69b 100644 --- a/libs/http-utils/src/lib.rs +++ b/libs/http-utils/src/lib.rs @@ -3,9 +3,10 @@ pub mod error; pub mod failpoints; pub mod json; pub mod request; +pub mod server; extern crate hyper0 as hyper; /// Current fast way to apply simple http routing in various Neon binaries. /// Re-exported for sake of uniform approach, that could be later replaced with better alternatives, if needed. -pub use routerify::{RouterBuilder, RouterService, ext::RequestExt}; +pub use routerify::{RequestServiceBuilder, RouterBuilder, RouterService, ext::RequestExt}; diff --git a/libs/http-utils/src/server.rs b/libs/http-utils/src/server.rs new file mode 100644 index 0000000000..33e4915e99 --- /dev/null +++ b/libs/http-utils/src/server.rs @@ -0,0 +1,155 @@ +use std::{error::Error, sync::Arc}; + +use futures::StreamExt; +use futures::stream::FuturesUnordered; +use hyper0::Body; +use hyper0::server::conn::Http; +use routerify::{RequestService, RequestServiceBuilder}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tokio_rustls::TlsAcceptor; +use tokio_util::sync::CancellationToken; +use tracing::{error, info}; + +use crate::error::ApiError; + +/// A simple HTTP server over hyper library. +/// You may want to use it instead of [`hyper0::server::Server`] because: +/// 1. hyper0's Server was removed from hyper v1. +/// It's recommended to replace hyepr0's Server with a manual loop, which is done here. +/// 2. hyper0's Server doesn't support TLS out of the box, and there is no way +/// to support it efficiently with the Accept trait that hyper0's Server uses. +/// That's one of the reasons why it was removed from v1. +/// +pub struct Server { + request_service: Arc>, + listener: tokio::net::TcpListener, + tls_acceptor: Option, +} + +impl Server { + pub fn new( + request_service: Arc>, + listener: std::net::TcpListener, + tls_acceptor: Option, + ) -> anyhow::Result { + // Note: caller of from_std is responsible for setting nonblocking mode. + listener.set_nonblocking(true)?; + let listener = tokio::net::TcpListener::from_std(listener)?; + + Ok(Self { + request_service, + listener, + tls_acceptor, + }) + } + + pub async fn serve(self, cancel: CancellationToken) -> anyhow::Result<()> { + fn suppress_io_error(err: &std::io::Error) -> bool { + use std::io::ErrorKind::*; + matches!(err.kind(), ConnectionReset | ConnectionAborted | BrokenPipe) + } + fn suppress_hyper_error(err: &hyper0::Error) -> bool { + if err.is_incomplete_message() || err.is_closed() || err.is_timeout() { + return true; + } + if let Some(inner) = err.source() { + if let Some(io) = inner.downcast_ref::() { + return suppress_io_error(io); + } + } + false + } + + let mut connections = FuturesUnordered::new(); + loop { + tokio::select! { + stream = self.listener.accept() => { + let (tcp_stream, remote_addr) = match stream { + Ok(stream) => stream, + Err(err) => { + if !suppress_io_error(&err) { + info!("Failed to accept TCP connection: {err:#}"); + } + continue; + } + }; + + let service = self.request_service.build(remote_addr); + let tls_acceptor = self.tls_acceptor.clone(); + let cancel = cancel.clone(); + + connections.push(tokio::spawn( + async move { + match tls_acceptor { + Some(tls_acceptor) => { + // Handle HTTPS connection. + let tls_stream = tokio::select! { + tls_stream = tls_acceptor.accept(tcp_stream) => tls_stream, + _ = cancel.cancelled() => return, + }; + let tls_stream = match tls_stream { + Ok(tls_stream) => tls_stream, + Err(err) => { + if !suppress_io_error(&err) { + info!("Failed to accept TLS connection: {err:#}"); + } + return; + } + }; + if let Err(err) = Self::serve_connection(tls_stream, service, cancel).await { + if !suppress_hyper_error(&err) { + info!("Failed to serve HTTPS connection: {err:#}"); + } + } + } + None => { + // Handle HTTP connection. + if let Err(err) = Self::serve_connection(tcp_stream, service, cancel).await { + if !suppress_hyper_error(&err) { + info!("Failed to serve HTTP connection: {err:#}"); + } + } + } + }; + })); + } + Some(conn) = connections.next() => { + if let Err(err) = conn { + error!("Connection panicked: {err:#}"); + } + } + _ = cancel.cancelled() => { + // Wait for graceful shutdown of all connections. + while let Some(conn) = connections.next().await { + if let Err(err) = conn { + error!("Connection panicked: {err:#}"); + } + } + break; + } + } + } + Ok(()) + } + + /// Serves HTTP connection with graceful shutdown. + async fn serve_connection( + io: I, + service: RequestService, + cancel: CancellationToken, + ) -> Result<(), hyper0::Error> + where + I: AsyncRead + AsyncWrite + Unpin + Send + 'static, + { + let mut conn = Http::new().serve_connection(io, service).with_upgrades(); + + tokio::select! { + res = &mut conn => res, + _ = cancel.cancelled() => { + Pin::new(&mut conn).graceful_shutdown(); + // Note: connection should still be awaited for graceful shutdown to complete. + conn.await + } + } + } +} diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index f387ff0579..ce7de1e0c7 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -35,6 +35,7 @@ pub struct NodeMetadata { pub postgres_port: u16, pub http_host: String, pub http_port: u16, + pub https_port: Option, // Deployment tools may write fields to the metadata file beyond what we // use in this type: this type intentionally only names fields that require. @@ -57,6 +58,9 @@ pub struct ConfigToml { // types mapped 1:1 into the runtime PageServerConfig type pub listen_pg_addr: String, pub listen_http_addr: String, + pub listen_https_addr: Option, + pub ssl_key_file: Utf8PathBuf, + pub ssl_cert_file: Utf8PathBuf, pub availability_zone: Option, #[serde(with = "humantime_serde")] pub wait_lsn_timeout: Duration, @@ -421,6 +425,9 @@ pub mod defaults { pub const DEFAULT_WAL_RECEIVER_PROTOCOL: utils::postgres_client::PostgresClientProtocol = utils::postgres_client::PostgresClientProtocol::Vanilla; + + pub const DEFAULT_SSL_KEY_FILE: &str = "server.key"; + pub const DEFAULT_SSL_CERT_FILE: &str = "server.crt"; } impl Default for ConfigToml { @@ -430,6 +437,9 @@ impl Default for ConfigToml { Self { listen_pg_addr: (DEFAULT_PG_LISTEN_ADDR.to_string()), listen_http_addr: (DEFAULT_HTTP_LISTEN_ADDR.to_string()), + listen_https_addr: (None), + ssl_key_file: Utf8PathBuf::from(DEFAULT_SSL_KEY_FILE), + ssl_cert_file: Utf8PathBuf::from(DEFAULT_SSL_CERT_FILE), availability_zone: (None), wait_lsn_timeout: (humantime::parse_duration(DEFAULT_WAIT_LSN_TIMEOUT) .expect("cannot parse default wait lsn timeout")), diff --git a/libs/pageserver_api/src/config/tests.rs b/libs/pageserver_api/src/config/tests.rs index edeefc156e..9e61873273 100644 --- a/libs/pageserver_api/src/config/tests.rs +++ b/libs/pageserver_api/src/config/tests.rs @@ -16,6 +16,30 @@ fn test_node_metadata_v1_backward_compatibilty() { postgres_port: 23, http_host: "localhost".to_string(), http_port: 42, + https_port: None, + other: HashMap::new(), + } + ) +} + +#[test] +fn test_node_metadata_v2_backward_compatibilty() { + let v2 = serde_json::to_vec(&serde_json::json!({ + "host": "localhost", + "port": 23, + "http_host": "localhost", + "http_port": 42, + "https_port": 123, + })); + + assert_eq!( + serde_json::from_slice::(&v2.unwrap()).unwrap(), + NodeMetadata { + postgres_host: "localhost".to_string(), + postgres_port: 23, + http_host: "localhost".to_string(), + http_port: 42, + https_port: Some(123), other: HashMap::new(), } ) diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index fa16090170..40ca1d3a33 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -48,6 +48,9 @@ pprof.workspace = true rand.workspace = true range-set-blaze = { version = "0.1.16", features = ["alloc"] } regex.workspace = true +rustls-pemfile.workspace = true +rustls-pki-types.workspace = true +rustls.workspace = true scopeguard.workspace = true send-future.workspace = true serde.workspace = true @@ -62,6 +65,7 @@ tokio = { workspace = true, features = ["process", "sync", "fs", "rt", "io-util" tokio-epoll-uring.workspace = true tokio-io-timeout.workspace = true tokio-postgres.workspace = true +tokio-rustls.workspace = true tokio-stream.workspace = true tokio-util.workspace = true toml_edit = { workspace = true, features = [ "serde" ] } diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index 37c914c4e9..830fd8a531 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody; use pageserver_api::models::*; use pageserver_api::shard::TenantShardId; pub use reqwest::Body as ReqwestBody; -use reqwest::{IntoUrl, Method, StatusCode}; +use reqwest::{Certificate, IntoUrl, Method, StatusCode}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; @@ -38,6 +38,9 @@ pub enum Error { #[error("Cancelled")] Cancelled, + + #[error("create client: {0}{}", .0.source().map(|e| format!(": {e}")).unwrap_or_default())] + CreateClient(reqwest::Error), } pub type Result = std::result::Result; @@ -69,8 +72,17 @@ pub enum ForceAwaitLogicalSize { } impl Client { - pub fn new(mgmt_api_endpoint: String, jwt: Option<&str>) -> Self { - Self::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt) + pub fn new( + mgmt_api_endpoint: String, + jwt: Option<&str>, + ssl_ca_cert: Option, + ) -> Result { + let mut http_client = reqwest::Client::builder(); + if let Some(ssl_ca_cert) = ssl_ca_cert { + http_client = http_client.add_root_certificate(ssl_ca_cert); + } + let http_client = http_client.build().map_err(Error::CreateClient)?; + Ok(Self::from_client(http_client, mgmt_api_endpoint, jwt)) } pub fn from_client( @@ -101,12 +113,10 @@ impl Client { debug_assert!(path.starts_with('/')); let uri = format!("{}{}", self.mgmt_api_endpoint, path); - let req = self.client.request(Method::GET, uri); - let req = if let Some(value) = &self.authorization_header { - req.header(reqwest::header::AUTHORIZATION, value) - } else { - req - }; + let mut req = self.client.request(Method::GET, uri); + if let Some(value) = &self.authorization_header { + req = req.header(reqwest::header::AUTHORIZATION, value); + } req.send().await.map_err(Error::ReceiveBody) } diff --git a/pageserver/pagebench/Cargo.toml b/pageserver/pagebench/Cargo.toml index 245d293e4f..5b5ed09a2b 100644 --- a/pageserver/pagebench/Cargo.toml +++ b/pageserver/pagebench/Cargo.toml @@ -15,6 +15,7 @@ hdrhistogram.workspace = true humantime.workspace = true humantime-serde.workspace = true rand.workspace = true +reqwest.workspace=true serde.workspace = true serde_json.workspace = true tracing.workspace = true diff --git a/pageserver/pagebench/src/cmd/aux_files.rs b/pageserver/pagebench/src/cmd/aux_files.rs index bab17540f5..394a954c30 100644 --- a/pageserver/pagebench/src/cmd/aux_files.rs +++ b/pageserver/pagebench/src/cmd/aux_files.rs @@ -36,7 +36,8 @@ async fn main_impl(args: Args) -> anyhow::Result<()> { let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new( args.mgmt_api_endpoint.clone(), args.pageserver_jwt.as_deref(), - )); + None, // TODO: support ssl_ca_file for https APIs in pagebench. + )?); // discover targets let timelines: Vec = crate::util::cli::targets::discover( diff --git a/pageserver/pagebench/src/cmd/basebackup.rs b/pageserver/pagebench/src/cmd/basebackup.rs index 51d7d5df89..d3013ded70 100644 --- a/pageserver/pagebench/src/cmd/basebackup.rs +++ b/pageserver/pagebench/src/cmd/basebackup.rs @@ -77,7 +77,8 @@ async fn main_impl( let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new( args.mgmt_api_endpoint.clone(), args.pageserver_jwt.as_deref(), - )); + None, // TODO: support ssl_ca_file for https APIs in pagebench. + )?); // discover targets let timelines: Vec = crate::util::cli::targets::discover( diff --git a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs index 617676c079..969cf24b93 100644 --- a/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs +++ b/pageserver/pagebench/src/cmd/getpage_latest_lsn.rs @@ -125,7 +125,8 @@ async fn main_impl( let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new( args.mgmt_api_endpoint.clone(), args.pageserver_jwt.as_deref(), - )); + None, // TODO: support ssl_ca_file for https APIs in pagebench. + )?); if let Some(engine_str) = &args.set_io_engine { mgmt_api_client.put_io_engine(engine_str).await?; diff --git a/pageserver/pagebench/src/cmd/ondemand_download_churn.rs b/pageserver/pagebench/src/cmd/ondemand_download_churn.rs index 3194e2e753..a77d3000cc 100644 --- a/pageserver/pagebench/src/cmd/ondemand_download_churn.rs +++ b/pageserver/pagebench/src/cmd/ondemand_download_churn.rs @@ -83,7 +83,8 @@ async fn main_impl(args: Args) -> anyhow::Result<()> { let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new( args.mgmt_api_endpoint.clone(), args.pageserver_jwt.as_deref(), - )); + None, // TODO: support ssl_ca_file for https APIs in pagebench. + )?); if let Some(engine_str) = &args.set_io_engine { mgmt_api_client.put_io_engine(engine_str).await?; diff --git a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs index 16abbf9ffd..2f919ec652 100644 --- a/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs +++ b/pageserver/pagebench/src/cmd/trigger_initial_size_calculation.rs @@ -40,7 +40,8 @@ async fn main_impl(args: Args) -> anyhow::Result<()> { let mgmt_api_client = Arc::new(pageserver_client::mgmt_api::Client::new( args.mgmt_api_endpoint.clone(), args.pageserver_jwt.as_deref(), - )); + None, // TODO: support ssl_ca_file for https APIs in pagebench. + )?); // discover targets let timelines: Vec = crate::util::cli::targets::discover( diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 703629aed5..c4af0d5d41 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -25,11 +25,12 @@ use pageserver::task_mgr::{ }; use pageserver::tenant::{TenantSharedResources, mgr, secondary}; use pageserver::{ - CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, http, page_cache, page_service, - task_mgr, virtual_file, + CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, HttpsEndpointListener, http, + page_cache, page_service, task_mgr, virtual_file, }; use postgres_backend::AuthType; use remote_storage::GenericRemoteStorage; +use rustls_pki_types::{CertificateDer, PrivateKeyDer}; use tokio::signal::unix::SignalKind; use tokio::time::Instant; use tokio_util::sync::CancellationToken; @@ -343,8 +344,15 @@ fn start_pageserver( info!("Starting pageserver http handler on {http_addr}"); let http_listener = tcp_listener::bind(http_addr)?; - let pg_addr = &conf.listen_pg_addr; + let https_listener = match conf.listen_https_addr.as_ref() { + Some(https_addr) => { + info!("Starting pageserver https handler on {https_addr}"); + Some(tcp_listener::bind(https_addr)?) + } + None => None, + }; + let pg_addr = &conf.listen_pg_addr; info!("Starting pageserver pg protocol handler on {pg_addr}"); let pageserver_listener = tcp_listener::bind(pg_addr)?; @@ -575,9 +583,8 @@ fn start_pageserver( // Start up the service to handle HTTP mgmt API request. We created the // listener earlier already. - let http_endpoint_listener = { + let (http_endpoint_listener, https_endpoint_listener) = { let _rt_guard = MGMT_REQUEST_RUNTIME.enter(); // for hyper - let cancel = CancellationToken::new(); let router_state = Arc::new( http::routes::State::new( @@ -592,22 +599,51 @@ fn start_pageserver( ) .context("Failed to initialize router state")?, ); + let router = http::make_router(router_state, launch_ts, http_auth.clone())? .build() .map_err(|err| anyhow!(err))?; - let service = http_utils::RouterService::new(router).unwrap(); - let server = hyper0::Server::from_tcp(http_listener)? - .serve(service) - .with_graceful_shutdown({ - let cancel = cancel.clone(); - async move { cancel.clone().cancelled().await } - }); - let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error( - "http endpoint listener", - server, - )); - HttpEndpointListener(CancellableTask { task, cancel }) + let service = + Arc::new(http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow!(err))?); + + let http_task = { + let server = + http_utils::server::Server::new(Arc::clone(&service), http_listener, None)?; + let cancel = CancellationToken::new(); + + let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error( + "http endpoint listener", + server.serve(cancel.clone()), + )); + HttpEndpointListener(CancellableTask { task, cancel }) + }; + + let https_task = match https_listener { + Some(https_listener) => { + let certs = load_certs(&conf.ssl_cert_file)?; + let key = load_private_key(&conf.ssl_key_file)?; + + let server_config = rustls::ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(certs, key)?; + + let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config)); + + let server = + http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?; + let cancel = CancellationToken::new(); + + let task = MGMT_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error( + "https endpoint listener", + server.serve(cancel.clone()), + )); + Some(HttpsEndpointListener(CancellableTask { task, cancel })) + } + None => None, + }; + + (http_task, https_task) }; let consumption_metrics_tasks = { @@ -683,6 +719,7 @@ fn start_pageserver( shutdown_pageserver.cancel(); pageserver::shutdown_pageserver( http_endpoint_listener, + https_endpoint_listener, page_service, consumption_metrics_tasks, disk_usage_eviction_task, @@ -697,6 +734,25 @@ fn start_pageserver( }) } +fn load_certs(filename: &Utf8Path) -> std::io::Result>> { + let file = std::fs::File::open(filename)?; + let mut reader = std::io::BufReader::new(file); + + rustls_pemfile::certs(&mut reader).collect() +} + +fn load_private_key(filename: &Utf8Path) -> anyhow::Result> { + let file = std::fs::File::open(filename)?; + let mut reader = std::io::BufReader::new(file); + + let key = rustls_pemfile::private_key(&mut reader)?; + + key.ok_or(anyhow::anyhow!( + "no private key found in {}", + filename.as_str(), + )) +} + async fn create_remote_storage_client( conf: &'static PageServerConf, ) -> anyhow::Result { diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index 06be873160..562a16a14e 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -53,6 +53,11 @@ pub struct PageServerConf { pub listen_pg_addr: String, /// Example (default): 127.0.0.1:9898 pub listen_http_addr: String, + /// Example: 127.0.0.1:9899 + pub listen_https_addr: Option, + + pub ssl_key_file: Utf8PathBuf, + pub ssl_cert_file: Utf8PathBuf, /// Current availability zone. Used for traffic metrics. pub availability_zone: Option, @@ -317,6 +322,9 @@ impl PageServerConf { let pageserver_api::config::ConfigToml { listen_pg_addr, listen_http_addr, + listen_https_addr, + ssl_key_file, + ssl_cert_file, availability_zone, wait_lsn_timeout, wal_redo_timeout, @@ -375,6 +383,9 @@ impl PageServerConf { // ------------------------------------------------------------ listen_pg_addr, listen_http_addr, + listen_https_addr, + ssl_key_file, + ssl_cert_file, availability_zone, wait_lsn_timeout, wal_redo_timeout, diff --git a/pageserver/src/controller_upcall_client.rs b/pageserver/src/controller_upcall_client.rs index 6d5c727958..745d04cf62 100644 --- a/pageserver/src/controller_upcall_client.rs +++ b/pageserver/src/controller_upcall_client.rs @@ -181,7 +181,7 @@ impl ControlPlaneGenerationsApi for ControllerUpcallClient { listen_pg_port: m.postgres_port, listen_http_addr: m.http_host, listen_http_port: m.http_port, - listen_https_port: None, // TODO: Support https. + listen_https_port: m.https_port, availability_zone_id: az_id.expect("Checked above"), }) } diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index 02767055fb..8373d0bd87 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -64,6 +64,7 @@ pub struct CancellableTask { pub cancel: CancellationToken, } pub struct HttpEndpointListener(pub CancellableTask); +pub struct HttpsEndpointListener(pub CancellableTask); pub struct ConsumptionMetricsTasks(pub CancellableTask); pub struct DiskUsageEvictionTask(pub CancellableTask); impl CancellableTask { @@ -77,6 +78,7 @@ impl CancellableTask { #[allow(clippy::too_many_arguments)] pub async fn shutdown_pageserver( http_listener: HttpEndpointListener, + https_listener: Option, page_service: page_service::Listener, consumption_metrics_worker: ConsumptionMetricsTasks, disk_usage_eviction_task: Option, @@ -213,6 +215,15 @@ pub async fn shutdown_pageserver( ) .await; + if let Some(https_listener) = https_listener { + timed( + https_listener.0.shutdown(), + "shutdown https", + Duration::from_secs(1), + ) + .await; + } + // Shut down the HTTP endpoint last, so that you can still check the server's // status while it's shutting down. // FIXME: We should probably stop accepting commands like attach/detach earlier. diff --git a/storage_controller/src/heartbeater.rs b/storage_controller/src/heartbeater.rs index dab6799d3e..ee4c9ef9cd 100644 --- a/storage_controller/src/heartbeater.rs +++ b/storage_controller/src/heartbeater.rs @@ -178,6 +178,7 @@ impl HeartBeat for HeartbeaterTask let mut heartbeat_futs = FuturesUnordered::new(); for (node_id, node) in &*pageservers { heartbeat_futs.push({ + let ssl_ca_cert = self.ssl_ca_cert.clone(); let jwt_token = self.jwt_token.clone(); let cancel = self.cancel.clone(); @@ -193,6 +194,7 @@ impl HeartBeat for HeartbeaterTask .with_client_retries( |client| async move { client.get_utilization().await }, &jwt_token, + &ssl_ca_cert, 3, 3, Duration::from_secs(1), diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index 3e448d7013..b27804d820 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -657,7 +657,9 @@ async fn handle_tenant_timeline_passthrough( let client = mgmt_api::Client::new( node.base_url(), service.get_config().pageserver_jwt_token.as_deref(), - ); + service.get_config().ssl_ca_cert.clone(), + ) + .map_err(|e| ApiError::InternalServerError(anyhow::anyhow!(e)))?; let resp = client.get_raw(path).await.map_err(|e| // We return 503 here because if we can't successfully send a request to the pageserver, // either we aren't available or the pageserver is unavailable. diff --git a/storage_controller/src/node.rs b/storage_controller/src/node.rs index 735bae2123..40f3c7c58e 100644 --- a/storage_controller/src/node.rs +++ b/storage_controller/src/node.rs @@ -7,7 +7,7 @@ use pageserver_api::controller_api::{ }; use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api; -use reqwest::StatusCode; +use reqwest::{Certificate, StatusCode}; use serde::Serialize; use tokio_util::sync::CancellationToken; use utils::backoff; @@ -276,10 +276,12 @@ impl Node { /// This will return None to indicate cancellation. Cancellation may happen from /// the cancellation token passed in, or from Self's cancellation token (i.e. node /// going offline). + #[allow(clippy::too_many_arguments)] pub(crate) async fn with_client_retries( &self, mut op: O, jwt: &Option, + ssl_ca_cert: &Option, warn_threshold: u32, max_retries: u32, timeout: Duration, @@ -298,19 +300,26 @@ impl Node { | ApiError(StatusCode::REQUEST_TIMEOUT, _) => false, ApiError(_, _) => true, Cancelled => true, + CreateClient(_) => true, } } + // TODO: refactor PageserverClient and with_client_retires (#11113). + let mut http_client = reqwest::ClientBuilder::new().timeout(timeout); + if let Some(ssl_ca_cert) = ssl_ca_cert.as_ref() { + http_client = http_client.add_root_certificate(ssl_ca_cert.clone()) + } + + let http_client = match http_client.build() { + Ok(http_client) => http_client, + Err(err) => return Some(Err(mgmt_api::Error::CreateClient(err))), + }; + backoff::retry( || { - let http_client = reqwest::ClientBuilder::new() - .timeout(timeout) - .build() - .expect("Failed to construct HTTP client"); - let client = PageserverClient::from_client( self.get_id(), - http_client, + http_client.clone(), self.base_url(), jwt.as_deref(), ); diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index d6127c355a..7fd4f37e7e 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -8,7 +8,7 @@ use pageserver_api::models::{ use pageserver_api::shard::TenantShardId; use pageserver_client::BlockUnblock; use pageserver_client::mgmt_api::{Client, Result}; -use reqwest::StatusCode; +use reqwest::{Certificate, StatusCode}; use utils::id::{NodeId, TenantId, TimelineId}; /// Thin wrapper around [`pageserver_client::mgmt_api::Client`]. It allows the storage @@ -46,11 +46,16 @@ macro_rules! measured_request { } impl PageserverClient { - pub(crate) fn new(node_id: NodeId, mgmt_api_endpoint: String, jwt: Option<&str>) -> Self { - Self { - inner: Client::from_client(reqwest::Client::new(), mgmt_api_endpoint, jwt), + pub(crate) fn new( + node_id: NodeId, + mgmt_api_endpoint: String, + jwt: Option<&str>, + ssl_ca_cert: Option, + ) -> Result { + Ok(Self { + inner: Client::new(mgmt_api_endpoint, jwt, ssl_ca_cert)?, node_id_label: node_id.0.to_string(), - } + }) } pub(crate) fn from_client( diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index a327f6f50f..9f0b789f19 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -299,6 +299,7 @@ impl Reconciler { .await }, &self.service_config.pageserver_jwt_token, + &self.service_config.ssl_ca_cert, 1, 3, timeout, @@ -420,7 +421,8 @@ impl Reconciler { node.get_id(), node.base_url(), self.service_config.pageserver_jwt_token.as_deref(), - ); + self.service_config.ssl_ca_cert.clone(), + )?; client .wait_lsn( @@ -443,7 +445,8 @@ impl Reconciler { node.get_id(), node.base_url(), self.service_config.pageserver_jwt_token.as_deref(), - ); + self.service_config.ssl_ca_cert.clone(), + )?; let timelines = client.timeline_list(&tenant_shard_id).await?; Ok(timelines @@ -481,6 +484,7 @@ impl Reconciler { .await }, &self.service_config.pageserver_jwt_token, + &self.service_config.ssl_ca_cert, 1, 3, request_download_timeout * 2, @@ -775,6 +779,7 @@ impl Reconciler { .with_client_retries( |client| async move { client.get_location_config(tenant_shard_id).await }, &self.service_config.pageserver_jwt_token, + &self.service_config.ssl_ca_cert, 1, 1, Duration::from_secs(5), @@ -1123,6 +1128,7 @@ impl Reconciler { .with_client_retries( |client| async move { client.get_location_config(tenant_shard_id).await }, &self.service_config.pageserver_jwt_token, + &self.service_config.ssl_ca_cert, 1, 3, Duration::from_secs(5), diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 2a68711977..b79f223a24 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -262,6 +262,7 @@ fn passthrough_api_error(node: &Node, e: mgmt_api::Error) -> ApiError { ApiError::Conflict(format!("{node} {status}: {status} {msg}")) } mgmt_api::Error::Cancelled => ApiError::ShuttingDown, + mgmt_api::Error::CreateClient(e) => ApiError::InternalServerError(anyhow::anyhow!(e)), } } @@ -887,6 +888,7 @@ impl Service { .with_client_retries( |client| async move { client.list_location_config().await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 5, timeout, @@ -984,11 +986,20 @@ impl Service { break; } - let client = PageserverClient::new( + let client = match PageserverClient::new( node.get_id(), node.base_url(), self.config.pageserver_jwt_token.as_deref(), - ); + self.config.ssl_ca_cert.clone(), + ) { + Ok(client) => client, + Err(e) => { + tracing::error!( + "Failed to create client to detach unknown shard {tenant_shard_id} on pageserver {node_id}: {e}" + ); + continue; + } + }; match client .location_config( tenant_shard_id, @@ -1015,7 +1026,7 @@ impl Service { // Non-fatal error: leaving a tenant shard behind that we are not managing shouldn't // break anything. tracing::error!( - "Failed to detach unknkown shard {tenant_shard_id} on pageserver {node_id}: {e}" + "Failed to detach unknown shard {tenant_shard_id} on pageserver {node_id}: {e}" ); } } @@ -1924,6 +1935,7 @@ impl Service { .with_client_retries( |client| async move { client.list_location_config().await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 5, SHORT_RECONCILE_TIMEOUT, @@ -1982,6 +1994,7 @@ impl Service { .await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 5, SHORT_RECONCILE_TIMEOUT, @@ -3125,7 +3138,9 @@ impl Service { node.get_id(), node.base_url(), self.config.pageserver_jwt_token.as_deref(), - ); + self.config.ssl_ca_cert.clone(), + ) + .map_err(|e| passthrough_api_error(&node, e))?; tracing::info!("Doing time travel recovery for shard {tenant_shard_id}",); @@ -3186,7 +3201,9 @@ impl Service { node.get_id(), node.base_url(), self.config.pageserver_jwt_token.as_deref(), - ); + self.config.ssl_ca_cert.clone(), + ) + .map_err(|e| passthrough_api_error(&node, e))?; futs.push(async move { let result = client .tenant_secondary_download(tenant_shard_id, wait) @@ -3309,6 +3326,7 @@ impl Service { .await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 3, RECONCILE_TIMEOUT, @@ -3464,6 +3482,7 @@ impl Service { tenant_shard_id: TenantShardId, locations: ShardMutationLocations, jwt: Option, + ssl_ca_cert: Option, create_req: TimelineCreateRequest, ) -> Result { let latest = locations.latest.node; @@ -3476,7 +3495,8 @@ impl Service { ); let client = - PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref()); + PageserverClient::new(latest.get_id(), latest.base_url(), jwt.as_deref(), ssl_ca_cert.clone()) + .map_err(|e| passthrough_api_error(&latest, e))?; let timeline_info = client .timeline_create(tenant_shard_id, &create_req) @@ -3499,7 +3519,9 @@ impl Service { location.node.get_id(), location.node.base_url(), jwt.as_deref(), - ); + ssl_ca_cert.clone(), + ) + .map_err(|e| passthrough_api_error(&location.node, e))?; let res = client .timeline_create(tenant_shard_id, &create_req) @@ -3528,6 +3550,7 @@ impl Service { shard_zero_tid, shard_zero_locations, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), create_req.clone(), ) .await?; @@ -3557,6 +3580,7 @@ impl Service { tenant_shard_id, mutation_locations, jwt.clone(), + self.config.ssl_ca_cert.clone(), create_req, )) }, @@ -3598,13 +3622,15 @@ impl Service { timeline_id: TimelineId, node: Node, jwt: Option, + ssl_ca_cert: Option, req: TimelineArchivalConfigRequest, ) -> Result<(), ApiError> { tracing::info!( "Setting archival config of timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}", ); - let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref()); + let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert) + .map_err(|e| passthrough_api_error(&node, e))?; client .timeline_archival_config(tenant_shard_id, timeline_id, &req) @@ -3627,6 +3653,7 @@ impl Service { timeline_id, node, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), req.clone(), )) }) @@ -3663,12 +3690,14 @@ impl Service { timeline_id: TimelineId, node: Node, jwt: Option, + ssl_ca_cert: Option, ) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> { tracing::info!( "Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}", ); - let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref()); + let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert) + .map_err(|e| passthrough_api_error(&node, e))?; client .timeline_detach_ancestor(tenant_shard_id, timeline_id) @@ -3708,6 +3737,7 @@ impl Service { timeline_id, node, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), )) }) .await?; @@ -3760,9 +3790,16 @@ impl Service { timeline_id: TimelineId, node: Node, jwt: Option, + ssl_ca_cert: Option, dir: BlockUnblock, ) -> Result<(), ApiError> { - let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref()); + let client = PageserverClient::new( + node.get_id(), + node.base_url(), + jwt.as_deref(), + ssl_ca_cert, + ) + .map_err(|e| passthrough_api_error(&node, e))?; client .timeline_block_unblock_gc(tenant_shard_id, timeline_id, dir) @@ -3782,6 +3819,7 @@ impl Service { timeline_id, node, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), dir, )) }) @@ -3903,6 +3941,7 @@ impl Service { node.with_client_retries( |client| op(tenant_shard_id, client), &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, warn_threshold, max_retries, timeout, @@ -4126,12 +4165,14 @@ impl Service { timeline_id: TimelineId, node: Node, jwt: Option, + ssl_ca_cert: Option, ) -> Result { tracing::info!( "Deleting timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}", ); - let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref()); + let client = PageserverClient::new(node.get_id(), node.base_url(), jwt.as_deref(), ssl_ca_cert) + .map_err(|e| passthrough_api_error(&node, e))?; let res = client .timeline_delete(tenant_shard_id, timeline_id) .await; @@ -4158,6 +4199,7 @@ impl Service { timeline_id, node, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), )) }) .await?; @@ -4180,6 +4222,7 @@ impl Service { timeline_id, shard_zero_locations.latest.node, self.config.pageserver_jwt_token.clone(), + self.config.ssl_ca_cert.clone(), ) .await?; Ok(shard_zero_status) @@ -4611,6 +4654,7 @@ impl Service { client.location_config(child_id, config, None, false).await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 10, Duration::from_secs(5), @@ -5214,7 +5258,9 @@ impl Service { node.get_id(), node.base_url(), self.config.pageserver_jwt_token.as_deref(), - ); + self.config.ssl_ca_cert.clone(), + ) + .map_err(|e| passthrough_api_error(node, e))?; let response = client .tenant_shard_split( *parent_id, @@ -5698,7 +5744,9 @@ impl Service { node.get_id(), node.base_url(), self.config.pageserver_jwt_token.as_deref(), - ); + self.config.ssl_ca_cert.clone(), + ) + .map_err(|e| passthrough_api_error(&node, e))?; let scan_result = client .tenant_scan_remote_storage(tenant_id) @@ -7340,6 +7388,7 @@ impl Service { .with_client_retries( |client| async move { client.tenant_heatmap_upload(tenant_shard_id).await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 3, 10, SHORT_RECONCILE_TIMEOUT, @@ -7376,6 +7425,7 @@ impl Service { .await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 3, 10, SHORT_RECONCILE_TIMEOUT, @@ -7503,6 +7553,7 @@ impl Service { node.with_client_retries( |client| async move { client.top_tenant_shards(request.clone()).await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 3, 3, Duration::from_secs(5), @@ -7622,6 +7673,7 @@ impl Service { .with_client_retries( |client| async move { client.tenant_secondary_status(tenant_shard_id).await }, &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, 1, 3, Duration::from_millis(250), diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 4d2b3587e8..7bc746d668 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -463,6 +463,10 @@ class NeonEnvBuilder: self.control_plane_compute_hook_api: str | None = None self.storage_controller_config: dict[Any, Any] | None = None + # Flag to enable https listener in pageserver, generate local ssl certs, + # and force storage controller to use https for pageserver api. + self.use_https_pageserver_api: bool = False + self.pageserver_virtual_file_io_engine: str | None = pageserver_virtual_file_io_engine self.pageserver_get_vectored_concurrent_io: str | None = ( pageserver_get_vectored_concurrent_io @@ -1059,6 +1063,11 @@ class NeonEnv: self.initial_tenant = config.initial_tenant self.initial_timeline = config.initial_timeline + self.generate_local_ssl_certs = config.use_https_pageserver_api + self.ssl_ca_file = ( + self.repo_dir.joinpath("rootCA.crt") if self.generate_local_ssl_certs else None + ) + neon_local_env_vars = {} if self.rust_log_override is not None: neon_local_env_vars["RUST_LOG"] = self.rust_log_override @@ -1122,6 +1131,7 @@ class NeonEnv: }, "safekeepers": [], "pageservers": [], + "generate_local_ssl_certs": self.generate_local_ssl_certs, } if self.control_plane_api is not None: @@ -1130,8 +1140,14 @@ class NeonEnv: if self.control_plane_compute_hook_api is not None: cfg["control_plane_compute_hook_api"] = self.control_plane_compute_hook_api - if self.storage_controller_config is not None: - cfg["storage_controller"] = self.storage_controller_config + storage_controller_config = self.storage_controller_config + + if config.use_https_pageserver_api: + storage_controller_config = storage_controller_config or {} + storage_controller_config["use_https_pageserver_api"] = True + + if storage_controller_config is not None: + cfg["storage_controller"] = storage_controller_config # Create config for pageserver http_auth_type = "NeonJWT" if config.auth_enabled else "Trust" @@ -1142,6 +1158,7 @@ class NeonEnv: pageserver_port = PageserverPort( pg=self.port_distributor.get_port(), http=self.port_distributor.get_port(), + https=self.port_distributor.get_port() if config.use_https_pageserver_api else None, ) # Availabilty zones may also be configured manually with `NeonEnvBuilder.pageserver_config_override` @@ -1156,6 +1173,9 @@ class NeonEnv: "id": ps_id, "listen_pg_addr": f"localhost:{pageserver_port.pg}", "listen_http_addr": f"localhost:{pageserver_port.http}", + "listen_https_addr": f"localhost:{pageserver_port.https}" + if config.use_https_pageserver_api + else None, "pg_auth_type": pg_auth_type, "http_auth_type": http_auth_type, "availability_zone": availability_zone, diff --git a/test_runner/regress/test_ssl.py b/test_runner/regress/test_ssl.py new file mode 100644 index 0000000000..25d839aa42 --- /dev/null +++ b/test_runner/regress/test_ssl.py @@ -0,0 +1,15 @@ +import requests +from fixtures.neon_fixtures import NeonEnvBuilder + + +def test_pageserver_https_api(neon_env_builder: NeonEnvBuilder): + """ + Test HTTPS pageserver management API. + If NeonEnv starts with use_https_pageserver_api with no errors, it's already a success. + Make /v1/status request to HTTPS API to ensure it's appropriately configured. + """ + neon_env_builder.use_https_pageserver_api = True + env = neon_env_builder.init_start() + + addr = f"https://localhost:{env.pageserver.service_port.https}/v1/status" + requests.get(addr, verify=str(env.ssl_ca_file)).raise_for_status() From 8c553297cb316bb2c39a799074cffff6db06d8f4 Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Mon, 10 Mar 2025 16:33:28 +0000 Subject: [PATCH 135/207] safekeeper: use max end lsn as start of next batch (#11152) ## Problem Partial reads are still problematic. They are stored in the buffer of the wal decoder and result in gaps being reported too eagerly on the pageserver side. ## Summary of changes Previously, we always used the start LSN of the chunk of WAL that was just read. This patch switches to using the end LSN of the last record that was decoded in the previous iteration. --- libs/postgres_ffi/src/lib.rs | 8 ++++++++ safekeeper/src/send_interpreted_wal.rs | 8 +++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/libs/postgres_ffi/src/lib.rs b/libs/postgres_ffi/src/lib.rs index 8dfd8d8750..05d8de4c7a 100644 --- a/libs/postgres_ffi/src/lib.rs +++ b/libs/postgres_ffi/src/lib.rs @@ -396,6 +396,14 @@ pub mod waldecoder { self.lsn + self.inputbuf.remaining() as u64 } + /// Returns the LSN up to which the WAL decoder has processed. + /// + /// If [`Self::poll_decode`] returned a record, then this will return + /// the end LSN of said record. + pub fn lsn(&self) -> Lsn { + self.lsn + } + pub fn feed_bytes(&mut self, buf: &[u8]) { self.inputbuf.extend_from_slice(buf); } diff --git a/safekeeper/src/send_interpreted_wal.rs b/safekeeper/src/send_interpreted_wal.rs index c71f23a010..2b1fd7b854 100644 --- a/safekeeper/src/send_interpreted_wal.rs +++ b/safekeeper/src/send_interpreted_wal.rs @@ -219,12 +219,12 @@ impl InterpretedWalReaderState { } } - fn take_current_batch_wal_start(&mut self) -> Lsn { + fn replace_current_batch_wal_start(&mut self, with: Lsn) -> Lsn { match self { InterpretedWalReaderState::Running { current_batch_wal_start, .. - } => current_batch_wal_start.take().unwrap(), + } => current_batch_wal_start.replace(with).unwrap(), InterpretedWalReaderState::Done => { panic!("take_current_batch_wal_start called on finished reader") } @@ -416,10 +416,12 @@ impl InterpretedWalReader { let shard_ids = self.shard_senders.keys().copied().collect::>(); let mut records_by_sender: HashMap> = HashMap::new(); let mut max_next_record_lsn = None; + let mut max_end_record_lsn = None; while let Some((next_record_lsn, recdata)) = wal_decoder.poll_decode()? { assert!(next_record_lsn.is_aligned()); max_next_record_lsn = Some(next_record_lsn); + max_end_record_lsn = Some(wal_decoder.lsn()); let interpreted = InterpretedWalRecord::from_bytes_filtered( recdata, @@ -470,7 +472,7 @@ impl InterpretedWalReader { let batch_wal_start_lsn = { let mut guard = self.state.write().unwrap(); guard.update_current_position(max_next_record_lsn); - guard.take_current_batch_wal_start() + guard.replace_current_batch_wal_start(max_end_record_lsn.unwrap()) }; // Send interpreted records downstream. Anything that has already been seen From bc052fd0fcfd7552f17ffd59e497f89a503bac28 Mon Sep 17 00:00:00 2001 From: Matthias van de Meent Date: Mon, 10 Mar 2025 18:02:30 +0100 Subject: [PATCH 136/207] Add configuration options to disable prevlink checks (#11138) This allows for improved decoding of otherwise broken WAL. ## Problem Currently, if (or when) a WAL record has a wrong prevptr, that breaks decoding. With this, we don't have to break on that if we decide it's OK to proceed after that. ## Summary of changes Use a Neon GUC to allow the system to enable the NEON-specific skip_lsn_checks option in XLogReader. --- pgxn/neon/neon.c | 9 +++++++++ pgxn/neon/neon.h | 1 + pgxn/neon/walsender_hooks.c | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/pgxn/neon/neon.c b/pgxn/neon/neon.c index 4b448ba5f6..0f226cc9e2 100644 --- a/pgxn/neon/neon.c +++ b/pgxn/neon/neon.c @@ -457,6 +457,15 @@ _PG_init(void) PGC_SIGHUP, 0, NULL, NULL, NULL); + DefineCustomBoolVariable( + "neon.disable_wal_prevlink_checks", + "Disable validation of prev link in WAL records", + NULL, + &disable_wal_prev_lsn_checks, + false, + PGC_SIGHUP, + 0, + NULL, NULL, NULL); DefineCustomBoolVariable( "neon.allow_replica_misconfig", diff --git a/pgxn/neon/neon.h b/pgxn/neon/neon.h index 7686ce076b..c9beb8c318 100644 --- a/pgxn/neon/neon.h +++ b/pgxn/neon/neon.h @@ -23,6 +23,7 @@ extern char *wal_acceptors_list; extern int wal_acceptor_reconnect_timeout; extern int wal_acceptor_connection_timeout; extern int readahead_getpage_pull_timeout_ms; +extern bool disable_wal_prev_lsn_checks; #if PG_MAJORVERSION_NUM >= 17 extern uint32 WAIT_EVENT_NEON_LFC_MAINTENANCE; diff --git a/pgxn/neon/walsender_hooks.c b/pgxn/neon/walsender_hooks.c index a0fe3822cc..81198d6c8d 100644 --- a/pgxn/neon/walsender_hooks.c +++ b/pgxn/neon/walsender_hooks.c @@ -32,6 +32,8 @@ extern XLogRecPtr WalSndWaitForWal(XLogRecPtr loc); extern bool GetDonorShmem(XLogRecPtr *donor_lsn); extern XLogRecPtr GetXLogReplayRecPtr(TimeLineID *replayTLI); +bool disable_wal_prev_lsn_checks = false; + static XLogRecPtr NeonWALReadWaitForWAL(XLogRecPtr loc) { @@ -82,6 +84,8 @@ NeonWALPageRead( if (flushptr < targetPagePtr + reqLen) return -1; + xlogreader->skip_lsn_checks = disable_wal_prev_lsn_checks; + /* Read at most XLOG_BLCKSZ bytes */ if (targetPagePtr + XLOG_BLCKSZ <= flushptr) count = XLOG_BLCKSZ; From fb1957936ccfd4299f1e7912f6024470249721c2 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 10 Mar 2025 20:28:55 +0200 Subject: [PATCH 137/207] Fix caclulation of LFC used_pages (#11095) ## Problem Async prefetch in LFC PR cause incorrect calculation of LFC `used_pages`when page is overwritten ## Summary of changes Decrement `used_pages` is page is overwritten. --------- Co-authored-by: Konstantin Knizhnik Co-authored-by: Matthias van de Meent --- pgxn/neon/file_cache.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 9f0a877b07..f13522e55b 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -1195,9 +1195,11 @@ lfc_writev(NRelFileInfo rinfo, ForkNumber forkNum, BlockNumber blkno, state = GET_STATE(entry, chunk_offs + i); if (state == PENDING) { SET_STATE(entry, chunk_offs + i, REQUESTED); - } else if (state != REQUESTED) { + } else if (state == UNAVAILABLE) { SET_STATE(entry, chunk_offs + i, PENDING); break; + } else if (state == AVAILABLE) { + break; } if (!sleeping) { From 3451bdd3d219c04a28a3f0499924827a816ca0c8 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Mon, 10 Mar 2025 16:03:49 -0400 Subject: [PATCH 138/207] fix(test): force L0 compaction before gc-compaction (#11143) ## Problem Fix test flakyness of `test_gc_feedback` Closes: https://github.com/neondatabase/neon/issues/11153 ## Summary of changes Looking at the log, gc-compaction is interrupted by L0 compaction. Signed-off-by: Alex Chi Z --- test_runner/performance/test_gc_feedback.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_runner/performance/test_gc_feedback.py b/test_runner/performance/test_gc_feedback.py index acb7b56fd0..7c9e9f47c8 100644 --- a/test_runner/performance/test_gc_feedback.py +++ b/test_runner/performance/test_gc_feedback.py @@ -69,6 +69,9 @@ def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma env.create_branch("child") branch_created += 1 + # Ensure L0 layers are compacted so that gc-compaction doesn't get preempted. + client.timeline_checkpoint(tenant_id, timeline_id, force_l0_compaction=True) + max_num_of_deltas_above_image = 0 max_total_num_of_deltas = 0 for key_range in client.perf_info(tenant_id, timeline_id): From 4d3c4776894ed0838c4dbaaf4c96795caa8831ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 11 Mar 2025 03:31:22 +0100 Subject: [PATCH 139/207] storcon: timetime table, creation and deletion (#11058) This PR extends the storcon with basic safekeeper management of timelines, mainly timeline creation and deletion. We want to make the storcon manage safekeepers in the future. Timeline creation is controlled by the `--timelines-onto-safekeepers` flag. 1. it adds the `timelines` and `safekeeper_timeline_pending_ops` tables to the storcon db 2. extend code for the timeline creation and deletion 4. it adds per-safekeeper reconciler tasks TODO: * maybe not immediately schedule reconciliations for deletions but have a prior manual step * tenant deletions * add exclude API definitions (probably separate PR) * how to choose safekeeper to do exclude on vs deletion? this can be a bit hairy because the safekeeper might go offline in the meantime. * error/failure case handling * tests (cc test_explicit_timeline_creation from #11002) * single safekeeper mode: we often only have one SK (in tests for example) * `notify-safekeepers` hook: https://github.com/neondatabase/neon/issues/11163 TODOs implemented: * cancellations of enqueued reconciliations on a per-timeline basis, helpful if there is an ongoing deletion * implement pending ops overwrite behavior * load pending operations from db RFC section for important reading: [link](https://github.com/neondatabase/neon/blob/main/docs/rfcs/035-safekeeper-dynamic-membership-change.md#storage_controller-implementation) Implements the bulk of #9011 Successor of #10440. --------- Co-authored-by: Arseny Sher --- Cargo.lock | 1 + control_plane/src/local_env.rs | 3 + control_plane/src/storage_controller.rs | 4 + libs/pageserver_api/src/models.rs | 25 + libs/safekeeper_api/src/membership.rs | 8 + libs/safekeeper_api/src/models.rs | 4 +- storage_controller/Cargo.toml | 1 + .../down.sql | 2 + .../up.sql | 19 + storage_controller/src/main.rs | 6 + storage_controller/src/persistence.rs | 307 ++++++++++- storage_controller/src/safekeeper.rs | 4 + storage_controller/src/safekeeper_client.rs | 3 - storage_controller/src/schema.rs | 34 ++ storage_controller/src/service.rs | 513 +++++++++++++++++- .../src/service/safekeeper_reconciler.rs | 340 ++++++++++++ 16 files changed, 1248 insertions(+), 26 deletions(-) create mode 100644 storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/down.sql create mode 100644 storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/up.sql create mode 100644 storage_controller/src/service/safekeeper_reconciler.rs diff --git a/Cargo.lock b/Cargo.lock index fa40009769..778ff19fec 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6621,6 +6621,7 @@ dependencies = [ "bytes", "chrono", "clap", + "clashmap", "control_plane", "cron", "diesel", diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 2e8fb8f07b..ec9eb74e6f 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -177,6 +177,8 @@ pub struct NeonStorageControllerConf { #[serde(default)] pub use_https_pageserver_api: bool, + + pub timelines_onto_safekeepers: bool, } impl NeonStorageControllerConf { @@ -201,6 +203,7 @@ impl Default for NeonStorageControllerConf { heartbeat_interval: Self::DEFAULT_HEARTBEAT_INTERVAL, long_reconcile_threshold: None, use_https_pageserver_api: false, + timelines_onto_safekeepers: false, } } } diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 1df50e211c..439d7936a7 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -584,6 +584,10 @@ impl StorageController { self.env.base_data_dir.display() )); + if self.config.timelines_onto_safekeepers { + args.push("--timelines-onto-safekeepers".to_string()); + } + background_process::start_process( COMMAND, &instance_dir, diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 749a8acc4e..13a9b5d89e 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -274,6 +274,31 @@ pub struct TimelineCreateRequest { pub mode: TimelineCreateRequestMode, } +/// Storage controller specific extensions to [`TimelineInfo`]. +#[derive(Serialize, Deserialize, Clone)] +pub struct TimelineCreateResponseStorcon { + #[serde(flatten)] + pub timeline_info: TimelineInfo, + + pub safekeepers: Option, +} + +/// Safekeepers as returned in timeline creation request to storcon or pushed to +/// cplane in the post migration hook. +#[derive(Serialize, Deserialize, Clone)] +pub struct SafekeepersInfo { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub generation: u32, + pub safekeepers: Vec, +} + +#[derive(Serialize, Deserialize, Clone)] +pub struct SafekeeperInfo { + pub id: NodeId, + pub hostname: String, +} + #[derive(Serialize, Deserialize, Clone)] #[serde(untagged)] pub enum TimelineCreateRequestMode { diff --git a/libs/safekeeper_api/src/membership.rs b/libs/safekeeper_api/src/membership.rs index bb8934744a..3d4d17096e 100644 --- a/libs/safekeeper_api/src/membership.rs +++ b/libs/safekeeper_api/src/membership.rs @@ -131,6 +131,14 @@ impl Configuration { } } + pub fn new(members: MemberSet) -> Self { + Configuration { + generation: INITIAL_GENERATION, + members, + new_members: None, + } + } + /// Is `sk_id` member of the configuration? pub fn contains(&self, sk_id: NodeId) -> bool { self.members.contains(sk_id) || self.new_members.as_ref().is_some_and(|m| m.contains(sk_id)) diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 2f2aeaa429..10c703395f 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -18,7 +18,7 @@ pub struct SafekeeperStatus { pub id: NodeId, } -#[derive(Serialize, Deserialize)] +#[derive(Serialize, Deserialize, Clone)] pub struct TimelineCreateRequest { pub tenant_id: TenantId, pub timeline_id: TimelineId, @@ -283,7 +283,7 @@ pub struct SafekeeperUtilization { } /// pull_timeline request body. -#[derive(Debug, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct PullTimelineRequest { pub tenant_id: TenantId, pub timeline_id: TimelineId, diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index 6b657b5ea0..8211bdce62 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -19,6 +19,7 @@ bytes.workspace = true chrono.workspace = true clap.workspace = true cron.workspace = true +clashmap.workspace = true fail.workspace = true futures.workspace = true governor.workspace = true diff --git a/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/down.sql b/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/down.sql new file mode 100644 index 0000000000..8f75e8947e --- /dev/null +++ b/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/down.sql @@ -0,0 +1,2 @@ +DROP TABLE timelines; +DROP TABLE safekeeper_timeline_pending_ops; diff --git a/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/up.sql b/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/up.sql new file mode 100644 index 0000000000..82003ab292 --- /dev/null +++ b/storage_controller/migrations/2025-02-14-160526_safekeeper_timelines/up.sql @@ -0,0 +1,19 @@ +CREATE TABLE timelines ( + tenant_id VARCHAR NOT NULL, + timeline_id VARCHAR NOT NULL, + start_lsn pg_lsn NOT NULL, + generation INTEGER NOT NULL, + sk_set BIGINT[] NOT NULL, + new_sk_set BIGINT[], + cplane_notified_generation INTEGER NOT NULL, + deleted_at timestamptz, + PRIMARY KEY(tenant_id, timeline_id) +); +CREATE TABLE safekeeper_timeline_pending_ops ( + sk_id BIGINT NOT NULL, + tenant_id VARCHAR NOT NULL, + timeline_id VARCHAR NOT NULL, + generation INTEGER NOT NULL, + op_kind VARCHAR NOT NULL, + PRIMARY KEY(tenant_id, timeline_id, sk_id) +); diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 1d49cd85ca..46ac1cd7ca 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -144,6 +144,11 @@ struct Cli { /// Flag to use https for requests to pageserver API. #[arg(long, default_value = "false")] use_https_pageserver_api: bool, + + // Whether to put timelines onto safekeepers + #[arg(long, default_value = "false")] + timelines_onto_safekeepers: bool, + /// Flag to use https for requests to safekeeper API. #[arg(long, default_value = "false")] use_https_safekeeper_api: bool, @@ -370,6 +375,7 @@ async fn async_main() -> anyhow::Result<()> { use_https_pageserver_api: args.use_https_pageserver_api, use_https_safekeeper_api: args.use_https_safekeeper_api, ssl_ca_cert, + timelines_onto_safekeepers: args.timelines_onto_safekeepers, }; // Validate that we can connect to the database diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 939b8c6cd8..5146fe472e 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -1,12 +1,15 @@ pub(crate) mod split_state; use std::collections::HashMap; +use std::io::Write; use std::str::FromStr; use std::sync::Arc; use std::time::{Duration, Instant}; use diesel::deserialize::{FromSql, FromSqlRow}; +use diesel::expression::AsExpression; use diesel::pg::Pg; use diesel::prelude::*; +use diesel::serialize::{IsNull, ToSql}; use diesel_async::async_connection_wrapper::AsyncConnectionWrapper; use diesel_async::pooled_connection::bb8::Pool; use diesel_async::pooled_connection::{AsyncDieselConnectionManager, ManagerConfig}; @@ -29,7 +32,8 @@ use rustls::crypto::ring; use scoped_futures::ScopedBoxFuture; use serde::{Deserialize, Serialize}; use utils::generation::Generation; -use utils::id::{NodeId, TenantId}; +use utils::id::{NodeId, TenantId, TimelineId}; +use utils::lsn::Lsn; use self::split_state::SplitState; use crate::metrics::{ @@ -117,6 +121,11 @@ pub(crate) enum DatabaseOperation { GetLeader, UpdateLeader, SetPreferredAzs, + InsertTimeline, + GetTimeline, + InsertTimelineReconcile, + RemoveTimelineReconcile, + ListTimelineReconcile, } #[must_use] @@ -1276,6 +1285,166 @@ impl Persistence { }) .await } + + /// Persist timeline. Returns if the timeline was newly inserted. If it wasn't, we haven't done any writes. + pub(crate) async fn insert_timeline(&self, entry: TimelinePersistence) -> DatabaseResult { + use crate::schema::timelines; + + let entry = &entry; + self.with_measured_conn(DatabaseOperation::InsertTimeline, move |conn| { + Box::pin(async move { + let inserted_updated = diesel::insert_into(timelines::table) + .values(entry) + .on_conflict((timelines::tenant_id, timelines::timeline_id)) + .do_nothing() + .execute(conn) + .await?; + + match inserted_updated { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(DatabaseError::Logical(format!( + "unexpected number of rows ({})", + inserted_updated + ))), + } + }) + }) + .await + } + + /// Load timeline from db. Returns `None` if not present. + pub(crate) async fn get_timeline( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + ) -> DatabaseResult> { + use crate::schema::timelines::dsl; + + let tenant_id = &tenant_id; + let timeline_id = &timeline_id; + let timeline_from_db = self + .with_measured_conn(DatabaseOperation::GetTimeline, move |conn| { + Box::pin(async move { + let mut from_db: Vec = dsl::timelines + .filter( + dsl::tenant_id + .eq(&tenant_id.to_string()) + .and(dsl::timeline_id.eq(&timeline_id.to_string())), + ) + .load(conn) + .await?; + if from_db.is_empty() { + return Ok(None); + } + if from_db.len() != 1 { + return Err(DatabaseError::Logical(format!( + "unexpected number of rows ({})", + from_db.len() + ))); + } + + Ok(Some(from_db.pop().unwrap().into_persistence())) + }) + }) + .await?; + + Ok(timeline_from_db) + } + /// Persist pending op. Returns if it was newly inserted. If it wasn't, we haven't done any writes. + pub(crate) async fn insert_pending_op( + &self, + entry: TimelinePendingOpPersistence, + ) -> DatabaseResult { + use crate::schema::safekeeper_timeline_pending_ops as skpo; + // This overrides the `filter` fn used in other functions, so contain the mayhem via a function-local use + use diesel::query_dsl::methods::FilterDsl; + + let entry = &entry; + self.with_measured_conn(DatabaseOperation::InsertTimelineReconcile, move |conn| { + Box::pin(async move { + // For simplicity it makes sense to keep only the last operation + // per (tenant, timeline, sk) tuple: if we migrated a timeline + // from node and adding it back it is not necessary to remove + // data on it. Hence, generation is not part of primary key and + // we override any rows with lower generations here. + let inserted_updated = diesel::insert_into(skpo::table) + .values(entry) + .on_conflict((skpo::tenant_id, skpo::timeline_id, skpo::sk_id)) + .do_update() + .set(entry) + .filter(skpo::generation.lt(entry.generation)) + .execute(conn) + .await?; + + match inserted_updated { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(DatabaseError::Logical(format!( + "unexpected number of rows ({})", + inserted_updated + ))), + } + }) + }) + .await + } + /// Remove persisted pending op. + pub(crate) async fn remove_pending_op( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + sk_id: NodeId, + generation: u32, + ) -> DatabaseResult<()> { + use crate::schema::safekeeper_timeline_pending_ops::dsl; + + let tenant_id = &tenant_id; + let timeline_id = &timeline_id; + self.with_measured_conn(DatabaseOperation::RemoveTimelineReconcile, move |conn| { + Box::pin(async move { + diesel::delete(dsl::safekeeper_timeline_pending_ops) + .filter(dsl::tenant_id.eq(tenant_id.to_string())) + .filter(dsl::timeline_id.eq(timeline_id.to_string())) + .filter(dsl::sk_id.eq(sk_id.0 as i64)) + .filter(dsl::generation.eq(generation as i32)) + .execute(conn) + .await?; + Ok(()) + }) + }) + .await + } + + /// Load pending operations from db. + pub(crate) async fn list_pending_ops( + &self, + filter_for_sk: Option, + ) -> DatabaseResult> { + use crate::schema::safekeeper_timeline_pending_ops::dsl; + + const FILTER_VAL_1: i64 = 1; + const FILTER_VAL_2: i64 = 2; + let filter_opt = filter_for_sk.map(|id| id.0 as i64); + let timeline_from_db = self + .with_measured_conn(DatabaseOperation::ListTimelineReconcile, move |conn| { + Box::pin(async move { + let from_db: Vec = + dsl::safekeeper_timeline_pending_ops + .filter( + dsl::sk_id + .eq(filter_opt.unwrap_or(FILTER_VAL_1)) + .and(dsl::sk_id.eq(filter_opt.unwrap_or(FILTER_VAL_2))), + ) + .load(conn) + .await?; + Ok(from_db) + }) + }) + .await?; + + Ok(timeline_from_db) + } } pub(crate) fn load_certs() -> anyhow::Result> { @@ -1671,3 +1840,139 @@ struct InsertUpdateSafekeeper<'a> { availability_zone_id: &'a str, scheduling_policy: Option<&'a str>, } + +#[derive(Serialize, Deserialize, FromSqlRow, AsExpression, Eq, PartialEq, Debug, Copy, Clone)] +#[diesel(sql_type = crate::schema::sql_types::PgLsn)] +pub(crate) struct LsnWrapper(pub(crate) Lsn); + +impl From for LsnWrapper { + fn from(value: Lsn) -> Self { + LsnWrapper(value) + } +} + +impl FromSql for LsnWrapper { + fn from_sql( + bytes: ::RawValue<'_>, + ) -> diesel::deserialize::Result { + let byte_arr: diesel::deserialize::Result<[u8; 8]> = bytes + .as_bytes() + .try_into() + .map_err(|_| "Can't obtain lsn from sql".into()); + Ok(LsnWrapper(Lsn(u64::from_be_bytes(byte_arr?)))) + } +} + +impl ToSql for LsnWrapper { + fn to_sql<'b>( + &'b self, + out: &mut diesel::serialize::Output<'b, '_, Pg>, + ) -> diesel::serialize::Result { + out.write_all(&u64::to_be_bytes(self.0.0)) + .map(|_| IsNull::No) + .map_err(Into::into) + } +} + +#[derive(Insertable, AsChangeset, Queryable, Selectable, Clone)] +#[diesel(table_name = crate::schema::timelines)] +pub(crate) struct TimelinePersistence { + pub(crate) tenant_id: String, + pub(crate) timeline_id: String, + pub(crate) start_lsn: LsnWrapper, + pub(crate) generation: i32, + pub(crate) sk_set: Vec, + pub(crate) new_sk_set: Option>, + pub(crate) cplane_notified_generation: i32, + pub(crate) deleted_at: Option>, +} + +/// This is separate from [TimelinePersistence] only because postgres allows NULLs +/// in arrays and there is no way to forbid that at schema level. Hence diesel +/// wants `sk_set` to be `Vec>` instead of `Vec` for +/// Queryable/Selectable. It does however allow insertions without redundant +/// Option(s), so [TimelinePersistence] doesn't have them. +#[derive(Queryable, Selectable)] +#[diesel(table_name = crate::schema::timelines)] +pub(crate) struct TimelineFromDb { + pub(crate) tenant_id: String, + pub(crate) timeline_id: String, + pub(crate) start_lsn: LsnWrapper, + pub(crate) generation: i32, + pub(crate) sk_set: Vec>, + pub(crate) new_sk_set: Option>>, + pub(crate) cplane_notified_generation: i32, + pub(crate) deleted_at: Option>, +} + +impl TimelineFromDb { + fn into_persistence(self) -> TimelinePersistence { + // We should never encounter null entries in the sets, but we need to filter them out. + // There is no way to forbid this in the schema that diesel recognizes (to our knowledge). + let sk_set = self.sk_set.into_iter().flatten().collect::>(); + let new_sk_set = self + .new_sk_set + .map(|s| s.into_iter().flatten().collect::>()); + TimelinePersistence { + tenant_id: self.tenant_id, + timeline_id: self.timeline_id, + start_lsn: self.start_lsn, + generation: self.generation, + sk_set, + new_sk_set, + cplane_notified_generation: self.cplane_notified_generation, + deleted_at: self.deleted_at, + } + } +} + +#[derive(Insertable, AsChangeset, Queryable, Selectable, Clone)] +#[diesel(table_name = crate::schema::safekeeper_timeline_pending_ops)] +pub(crate) struct TimelinePendingOpPersistence { + pub(crate) sk_id: i64, + pub(crate) tenant_id: String, + pub(crate) timeline_id: String, + pub(crate) generation: i32, + pub(crate) op_kind: SafekeeperTimelineOpKind, +} + +#[derive(Serialize, Deserialize, FromSqlRow, AsExpression, Eq, PartialEq, Debug, Copy, Clone)] +#[diesel(sql_type = diesel::sql_types::VarChar)] +pub(crate) enum SafekeeperTimelineOpKind { + Pull, + Exclude, + Delete, +} + +impl FromSql for SafekeeperTimelineOpKind { + fn from_sql( + bytes: ::RawValue<'_>, + ) -> diesel::deserialize::Result { + let bytes = bytes.as_bytes(); + match core::str::from_utf8(bytes) { + Ok(s) => match s { + "pull" => Ok(SafekeeperTimelineOpKind::Pull), + "exclude" => Ok(SafekeeperTimelineOpKind::Exclude), + "delete" => Ok(SafekeeperTimelineOpKind::Delete), + _ => Err(format!("can't parse: {s}").into()), + }, + Err(e) => Err(format!("invalid UTF-8 for op_kind: {e}").into()), + } + } +} + +impl ToSql for SafekeeperTimelineOpKind { + fn to_sql<'b>( + &'b self, + out: &mut diesel::serialize::Output<'b, '_, Pg>, + ) -> diesel::serialize::Result { + let kind_str = match self { + SafekeeperTimelineOpKind::Pull => "pull", + SafekeeperTimelineOpKind::Exclude => "exclude", + SafekeeperTimelineOpKind::Delete => "delete", + }; + out.write_all(kind_str.as_bytes()) + .map(|_| IsNull::No) + .map_err(Into::into) + } +} diff --git a/storage_controller/src/safekeeper.rs b/storage_controller/src/safekeeper.rs index 16f72ef4bc..2bd28f29af 100644 --- a/storage_controller/src/safekeeper.rs +++ b/storage_controller/src/safekeeper.rs @@ -21,6 +21,7 @@ pub struct Safekeeper { listen_https_port: Option, scheduling_policy: SkSchedulingPolicy, id: NodeId, + /// Heartbeating result. availability: SafekeeperState, // Flag from storcon's config to use https for safekeeper API. @@ -85,6 +86,9 @@ impl Safekeeper { self.scheduling_policy = scheduling_policy; self.skp.scheduling_policy = scheduling_policy.into(); } + pub(crate) fn availability(&self) -> SafekeeperState { + self.availability.clone() + } /// Perform an operation (which is given a [`SafekeeperClient`]) with retries #[allow(clippy::too_many_arguments)] pub(crate) async fn with_client_retries( diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs index 662f6d43be..1533b6c086 100644 --- a/storage_controller/src/safekeeper_client.rs +++ b/storage_controller/src/safekeeper_client.rs @@ -57,7 +57,6 @@ impl SafekeeperClient { } } - #[allow(dead_code)] pub(crate) async fn create_timeline( &self, req: &TimelineCreateRequest, @@ -70,7 +69,6 @@ impl SafekeeperClient { ) } - #[allow(dead_code)] pub(crate) async fn delete_timeline( &self, tenant_id: TenantId, @@ -84,7 +82,6 @@ impl SafekeeperClient { ) } - #[allow(dead_code)] pub(crate) async fn pull_timeline( &self, req: &PullTimelineRequest, diff --git a/storage_controller/src/schema.rs b/storage_controller/src/schema.rs index ebfe630173..9b36376fcb 100644 --- a/storage_controller/src/schema.rs +++ b/storage_controller/src/schema.rs @@ -1,5 +1,11 @@ // @generated automatically by Diesel CLI. +pub mod sql_types { + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] + #[diesel(postgres_type(name = "pg_lsn", schema = "pg_catalog"))] + pub struct PgLsn; +} + diesel::table! { controllers (address, started_at) { address -> Varchar, @@ -30,6 +36,16 @@ diesel::table! { } } +diesel::table! { + safekeeper_timeline_pending_ops (tenant_id, timeline_id, sk_id) { + sk_id -> Int8, + tenant_id -> Varchar, + timeline_id -> Varchar, + generation -> Int4, + op_kind -> Varchar, + } +} + diesel::table! { safekeepers (id) { id -> Int8, @@ -60,10 +76,28 @@ diesel::table! { } } +diesel::table! { + use diesel::sql_types::*; + use super::sql_types::PgLsn; + + timelines (tenant_id, timeline_id) { + tenant_id -> Varchar, + timeline_id -> Varchar, + start_lsn -> PgLsn, + generation -> Int4, + sk_set -> Array>, + new_sk_set -> Nullable>>, + cplane_notified_generation -> Int4, + deleted_at -> Nullable, + } +} + diesel::allow_tables_to_appear_in_same_query!( controllers, metadata_health, nodes, + safekeeper_timeline_pending_ops, safekeepers, tenant_shards, + timelines, ); diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index b79f223a24..a06748abc6 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -1,5 +1,6 @@ pub mod chaos_injector; mod context_iterator; +pub(crate) mod safekeeper_reconciler; use std::borrow::Cow; use std::cmp::Ordering; @@ -34,11 +35,12 @@ use pageserver_api::controller_api::{ }; use pageserver_api::models::{ self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization, - SecondaryProgress, ShardParameters, TenantConfig, TenantConfigPatchRequest, - TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse, - TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse, TenantSorting, - TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, - TopTenantShardItem, TopTenantShardsRequest, + SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters, TenantConfig, + TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, + TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest, + TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest, + TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateResponseStorcon, + TimelineInfo, TopTenantShardItem, TopTenantShardsRequest, }; use pageserver_api::shard::{ ShardCount, ShardIdentity, ShardNumber, ShardStripeSize, TenantShardId, @@ -49,14 +51,18 @@ use pageserver_api::upcall_api::{ }; use pageserver_client::{BlockUnblock, mgmt_api}; use reqwest::{Certificate, StatusCode}; +use safekeeper_api::membership::{MemberSet, SafekeeperId}; use safekeeper_api::models::SafekeeperUtilization; +use safekeeper_reconciler::{SafekeeperReconcilers, ScheduleRequest}; use tokio::sync::TryAcquireError; use tokio::sync::mpsc::error::TrySendError; +use tokio::task::JoinSet; use tokio_util::sync::CancellationToken; use tracing::{Instrument, debug, error, info, info_span, instrument, warn}; use utils::completion::Barrier; use utils::generation::Generation; use utils::id::{NodeId, TenantId, TimelineId}; +use utils::logging::SecretString; use utils::sync::gate::Gate; use utils::{failpoint_support, pausable_failpoint}; @@ -77,8 +83,8 @@ use crate::peer_client::GlobalObservedState; use crate::persistence::split_state::SplitState; use crate::persistence::{ AbortShardSplitStatus, ControllerPersistence, DatabaseError, DatabaseResult, - MetadataHealthPersistence, Persistence, ShardGenerationState, TenantFilter, - TenantShardPersistence, + MetadataHealthPersistence, Persistence, SafekeeperTimelineOpKind, ShardGenerationState, + TenantFilter, TenantShardPersistence, TimelinePendingOpPersistence, TimelinePersistence, }; use crate::reconciler::{ ReconcileError, ReconcileUnits, ReconcilerConfig, ReconcilerConfigBuilder, ReconcilerPriority, @@ -202,6 +208,8 @@ struct ServiceState { safekeepers: Arc>, + safekeeper_reconcilers: SafekeeperReconcilers, + scheduler: Scheduler, /// Ongoing background operation on the cluster if any is running. @@ -274,6 +282,7 @@ impl ServiceState { scheduler: Scheduler, delayed_reconcile_rx: tokio::sync::mpsc::Receiver, initial_leadership_status: LeadershipStatus, + reconcilers_cancel: CancellationToken, ) -> Self { metrics::update_leadership_status(initial_leadership_status); @@ -282,6 +291,7 @@ impl ServiceState { tenants, nodes: Arc::new(nodes), safekeepers: Arc::new(safekeepers), + safekeeper_reconcilers: SafekeeperReconcilers::new(reconcilers_cancel), scheduler, ongoing_operation: None, delayed_reconcile_rx, @@ -401,9 +411,12 @@ pub struct Config { pub long_reconcile_threshold: Duration, pub use_https_pageserver_api: bool, + pub use_https_safekeeper_api: bool, pub ssl_ca_cert: Option, + + pub timelines_onto_safekeepers: bool, } impl From for ApiError { @@ -742,7 +755,27 @@ impl Service { std::process::exit(1); } - self.inner.write().unwrap().become_leader(); + let safekeepers = self.inner.read().unwrap().safekeepers.clone(); + let sk_schedule_requests = + match safekeeper_reconciler::load_schedule_requests(self, &safekeepers).await { + Ok(v) => v, + Err(e) => { + tracing::warn!( + "Failed to load safekeeper pending ops at startup: {e}." // Don't abort for now: " Aborting start-up..." + ); + // std::process::exit(1); + Vec::new() + } + }; + + { + let mut locked = self.inner.write().unwrap(); + locked.become_leader(); + + locked + .safekeeper_reconcilers + .schedule_request_vec(self, sk_schedule_requests); + } // TODO: if any tenant's intent now differs from its loaded generation_pageserver, we should clear that // generation_pageserver in the database. @@ -1059,6 +1092,7 @@ impl Service { } } } + /// Heartbeat all storage nodes once in a while. #[instrument(skip_all)] async fn spawn_heartbeat_driver(&self) { self.startup_complete.clone().wait().await; @@ -1607,6 +1641,7 @@ impl Service { scheduler, delayed_reconcile_rx, initial_leadership_status, + reconcilers_cancel.clone(), ))), config: config.clone(), persistence, @@ -3448,7 +3483,7 @@ impl Service { Ok(()) } - pub(crate) async fn tenant_timeline_create( + pub(crate) async fn tenant_timeline_create_pageservers( &self, tenant_id: TenantId, mut create_req: TimelineCreateRequest, @@ -3459,14 +3494,6 @@ impl Service { create_req.new_timeline_id, ); - let _tenant_lock = trace_shared_lock( - &self.tenant_op_locks, - tenant_id, - TenantOperations::TimelineCreate, - ) - .await; - failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock"); - self.tenant_remote_mutation(tenant_id, move |mut targets| async move { if targets.0.is_empty() { return Err(ApiError::NotFound( @@ -3593,6 +3620,323 @@ impl Service { .await? } + /// Timeline creation on safekeepers + /// + /// Returns `Ok(left)` if the timeline has been created on a quorum of safekeepers, + /// where `left` contains the list of safekeepers that didn't have a successful response. + /// Assumes tenant lock is held while calling this function. + async fn tenant_timeline_create_safekeepers_quorum( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + pg_version: u32, + timeline_persistence: &TimelinePersistence, + ) -> Result, ApiError> { + // If quorum is reached, return if we are outside of a specified timeout + let jwt = self + .config + .safekeeper_jwt_token + .clone() + .map(SecretString::from); + let mut joinset = JoinSet::new(); + + let safekeepers = { + let locked = self.inner.read().unwrap(); + locked.safekeepers.clone() + }; + + let mut members = Vec::new(); + for sk_id in timeline_persistence.sk_set.iter() { + let sk_id = NodeId(*sk_id as u64); + let Some(safekeeper) = safekeepers.get(&sk_id) else { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "couldn't find entry for safekeeper with id {sk_id}" + )))?; + }; + members.push(SafekeeperId { + id: sk_id, + host: safekeeper.skp.host.clone(), + pg_port: safekeeper.skp.port as u16, + }); + } + let mset = MemberSet::new(members).map_err(ApiError::InternalServerError)?; + let mconf = safekeeper_api::membership::Configuration::new(mset); + + let req = safekeeper_api::models::TimelineCreateRequest { + commit_lsn: None, + mconf, + pg_version, + start_lsn: timeline_persistence.start_lsn.0, + system_id: None, + tenant_id, + timeline_id, + wal_seg_size: None, + }; + const SK_CREATE_TIMELINE_RECONCILE_TIMEOUT: Duration = Duration::from_secs(30); + for sk in timeline_persistence.sk_set.iter() { + let sk_id = NodeId(*sk as u64); + let safekeepers = safekeepers.clone(); + let jwt = jwt.clone(); + let ssl_ca_cert = self.config.ssl_ca_cert.clone(); + let req = req.clone(); + joinset.spawn(async move { + // Unwrap is fine as we already would have returned error above + let sk_p = safekeepers.get(&sk_id).unwrap(); + let res = sk_p + .with_client_retries( + |client| { + let req = req.clone(); + async move { client.create_timeline(&req).await } + }, + &jwt, + &ssl_ca_cert, + 3, + 3, + SK_CREATE_TIMELINE_RECONCILE_TIMEOUT, + &CancellationToken::new(), + ) + .await; + (sk_id, sk_p.skp.host.clone(), res) + }); + } + // After we have built the joinset, we now wait for the tasks to complete, + // but with a specified timeout to make sure we return swiftly, either with + // a failure or success. + let reconcile_deadline = tokio::time::Instant::now() + SK_CREATE_TIMELINE_RECONCILE_TIMEOUT; + + // Wait until all tasks finish or timeout is hit, whichever occurs + // first. + let mut reconcile_results = Vec::new(); + loop { + if let Ok(res) = tokio::time::timeout_at(reconcile_deadline, joinset.join_next()).await + { + let Some(res) = res else { break }; + match res { + Ok(res) => { + tracing::info!( + "response from safekeeper id:{} at {}: {:?}", + res.0, + res.1, + res.2 + ); + reconcile_results.push(res); + } + Err(join_err) => { + tracing::info!("join_err for task in joinset: {join_err}"); + } + } + } else { + tracing::info!( + "timeout for creation call after {} responses", + reconcile_results.len() + ); + break; + } + } + + // Now check now if quorum was reached in reconcile_results. + let total_result_count = reconcile_results.len(); + let remaining = reconcile_results + .into_iter() + .filter_map(|res| res.2.is_err().then_some(res.0)) + .collect::>(); + tracing::info!( + "Got {} non-successful responses from initial creation request of total {total_result_count} responses", + remaining.len() + ); + if remaining.len() >= 2 { + // Failure + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "not enough successful reconciliations to reach quorum, please retry: {} errored", + remaining.len() + ))); + } + + Ok(remaining) + } + + /// Create timeline in controller database and on safekeepers. + /// `timeline_info` is result of timeline creation on pageserver. + /// + /// All actions must be idempotent as the call is retried until success. It + /// tries to create timeline in the db and on at least majority of + /// safekeepers + queue creation for safekeepers which missed it in the db + /// for infinite retries; after that, call returns Ok. + /// + /// The idea is that once this is reached as long as we have alive majority + /// of safekeepers it is expected to get eventually operational as storcon + /// will be able to seed timeline on nodes which missed creation by making + /// pull_timeline from peers. On the other hand we don't want to fail + /// timeline creation if one safekeeper is down. + async fn tenant_timeline_create_safekeepers( + self: &Arc, + tenant_id: TenantId, + timeline_info: &TimelineInfo, + create_mode: models::TimelineCreateRequestMode, + ) -> Result { + let timeline_id = timeline_info.timeline_id; + let pg_version = timeline_info.pg_version; + // Initially start_lsn is determined by last_record_lsn in pageserver + // response as it does initdb. However, later we persist it and in sk + // creation calls replace with the value from the timeline row if it + // previously existed as on retries in theory endpoint might have + // already written some data and advanced last_record_lsn, while we want + // safekeepers to have consistent start_lsn. + let start_lsn = match create_mode { + models::TimelineCreateRequestMode::Bootstrap { .. } => timeline_info.last_record_lsn, + models::TimelineCreateRequestMode::Branch { .. } => timeline_info.last_record_lsn, + models::TimelineCreateRequestMode::ImportPgdata { .. } => { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "import pgdata doesn't specify the start lsn, aborting creation on safekeepers" + )))?; + } + }; + // Choose initial set of safekeepers respecting affinity + let sks = self.safekeepers_for_new_timeline().await?; + let sks_persistence = sks.iter().map(|sk| sk.id.0 as i64).collect::>(); + // Add timeline to db + let mut timeline_persist = TimelinePersistence { + tenant_id: tenant_id.to_string(), + timeline_id: timeline_id.to_string(), + start_lsn: start_lsn.into(), + generation: 0, + sk_set: sks_persistence.clone(), + new_sk_set: None, + cplane_notified_generation: 0, + deleted_at: None, + }; + let inserted = self + .persistence + .insert_timeline(timeline_persist.clone()) + .await?; + if !inserted { + if let Some(existent_persist) = self + .persistence + .get_timeline(tenant_id, timeline_id) + .await? + { + // Replace with what we have in the db, to get stuff like the generation right. + // We do still repeat the http calls to the safekeepers. After all, we could have + // crashed right after the wrote to the DB. + timeline_persist = existent_persist; + } else { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "insertion said timeline already in db, but looking it up, it was gone" + ))); + } + } + // Create the timeline on a quorum of safekeepers + let remaining = self + .tenant_timeline_create_safekeepers_quorum( + tenant_id, + timeline_id, + pg_version, + &timeline_persist, + ) + .await?; + + // For the remaining safekeepers, take care of their reconciliation asynchronously + for &remaining_id in remaining.iter() { + let pending_op = TimelinePendingOpPersistence { + tenant_id: tenant_id.to_string(), + timeline_id: timeline_id.to_string(), + generation: timeline_persist.generation, + op_kind: crate::persistence::SafekeeperTimelineOpKind::Pull, + sk_id: remaining_id.0 as i64, + }; + tracing::info!("writing pending op for sk id {remaining_id}"); + self.persistence.insert_pending_op(pending_op).await?; + } + if !remaining.is_empty() { + let mut locked = self.inner.write().unwrap(); + for remaining_id in remaining { + let Some(sk) = locked.safekeepers.get(&remaining_id) else { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "Couldn't find safekeeper with id {remaining_id}" + ))); + }; + let Ok(host_list) = sks + .iter() + .map(|sk| { + Ok(( + sk.id, + locked + .safekeepers + .get(&sk.id) + .ok_or_else(|| { + ApiError::InternalServerError(anyhow::anyhow!( + "Couldn't find safekeeper with id {remaining_id} to pull from" + )) + })? + .base_url(), + )) + }) + .collect::>() + else { + continue; + }; + let req = ScheduleRequest { + safekeeper: Box::new(sk.clone()), + host_list, + tenant_id, + timeline_id, + generation: timeline_persist.generation as u32, + kind: crate::persistence::SafekeeperTimelineOpKind::Pull, + }; + locked.safekeeper_reconcilers.schedule_request(self, req); + } + } + + Ok(SafekeepersInfo { + generation: timeline_persist.generation as u32, + safekeepers: sks, + tenant_id, + timeline_id, + }) + } + + pub(crate) async fn tenant_timeline_create( + self: &Arc, + tenant_id: TenantId, + create_req: TimelineCreateRequest, + ) -> Result { + let safekeepers = self.config.timelines_onto_safekeepers; + tracing::info!( + %safekeepers, + "Creating timeline {}/{}", + tenant_id, + create_req.new_timeline_id, + ); + + let _tenant_lock = trace_shared_lock( + &self.tenant_op_locks, + tenant_id, + TenantOperations::TimelineCreate, + ) + .await; + failpoint_support::sleep_millis_async!("tenant-create-timeline-shared-lock"); + let create_mode = create_req.mode.clone(); + + let timeline_info = self + .tenant_timeline_create_pageservers(tenant_id, create_req) + .await?; + + let safekeepers = if safekeepers { + let res = self + .tenant_timeline_create_safekeepers(tenant_id, &timeline_info, create_mode) + .instrument(tracing::info_span!("timeline_create_safekeepers", %tenant_id, timeline_id=%timeline_info.timeline_id)) + .await?; + Some(res) + } else { + None + }; + + Ok(TimelineCreateResponseStorcon { + timeline_info, + safekeepers, + }) + } + pub(crate) async fn tenant_timeline_archival_config( &self, tenant_id: TenantId, @@ -4138,7 +4482,7 @@ impl Service { } pub(crate) async fn tenant_timeline_delete( - &self, + self: &Arc, tenant_id: TenantId, timeline_id: TimelineId, ) -> Result { @@ -4150,7 +4494,7 @@ impl Service { ) .await; - self.tenant_remote_mutation(tenant_id, move |mut targets| async move { + let status_code = self.tenant_remote_mutation(tenant_id, move |mut targets| async move { if targets.0.is_empty() { return Err(ApiError::NotFound( anyhow::anyhow!("Tenant not found").into(), @@ -4226,7 +4570,67 @@ impl Service { ) .await?; Ok(shard_zero_status) - }).await? + }).await?; + + self.tenant_timeline_delete_safekeepers(tenant_id, timeline_id) + .await?; + + status_code + } + /// Perform timeline deletion on safekeepers. Will return success: we persist the deletion into the reconciler. + async fn tenant_timeline_delete_safekeepers( + self: &Arc, + tenant_id: TenantId, + timeline_id: TimelineId, + ) -> Result<(), ApiError> { + let tl = self + .persistence + .get_timeline(tenant_id, timeline_id) + .await?; + let Some(tl) = tl else { + tracing::info!( + "timeline {tenant_id}/{timeline_id} doesn't exist in timelines table, no deletions on safekeepers needed" + ); + return Ok(()); + }; + let all_sks = tl + .new_sk_set + .iter() + .flat_map(|sks| { + sks.iter() + .map(|sk| (*sk, SafekeeperTimelineOpKind::Exclude)) + }) + .chain( + tl.sk_set + .iter() + .map(|v| (*v, SafekeeperTimelineOpKind::Delete)), + ) + .collect::>(); + + // Schedule reconciliations + { + let mut locked = self.inner.write().unwrap(); + for (sk_id, kind) in all_sks { + let sk_id = NodeId(sk_id as u64); + let Some(sk) = locked.safekeepers.get(&sk_id) else { + return Err(ApiError::InternalServerError(anyhow::anyhow!( + "Couldn't find safekeeper with id {sk_id}" + ))); + }; + + let req = ScheduleRequest { + safekeeper: Box::new(sk.clone()), + // we don't use this for this kind, put a dummy value + host_list: Vec::new(), + tenant_id, + timeline_id, + generation: tl.generation as u32, + kind, + }; + locked.safekeeper_reconcilers.schedule_request(self, req); + } + } + Ok(()) } /// When you know the TenantId but not a specific shard, and would like to get the node holding shard 0. @@ -8262,6 +8666,68 @@ impl Service { global_observed } + /// Choose safekeepers for the new timeline: 3 in different azs. + pub(crate) async fn safekeepers_for_new_timeline( + &self, + ) -> Result, ApiError> { + let mut all_safekeepers = { + let locked = self.inner.read().unwrap(); + locked + .safekeepers + .iter() + .filter_map(|sk| { + if sk.1.scheduling_policy() != SkSchedulingPolicy::Active { + // If we don't want to schedule stuff onto the safekeeper, respect that. + return None; + } + let utilization_opt = if let SafekeeperState::Available { + last_seen_at: _, + utilization, + } = sk.1.availability() + { + Some(utilization) + } else { + // non-available safekeepers still get a chance for new timelines, + // but put them last in the list. + None + }; + let info = SafekeeperInfo { + hostname: sk.1.skp.host.clone(), + id: NodeId(sk.1.skp.id as u64), + }; + Some((utilization_opt, info, sk.1.skp.availability_zone_id.clone())) + }) + .collect::>() + }; + all_safekeepers.sort_by_key(|sk| { + ( + sk.0.as_ref() + .map(|ut| ut.timeline_count) + .unwrap_or(u64::MAX), + // Use the id to decide on equal scores for reliability + sk.1.id.0, + ) + }); + let mut sks = Vec::new(); + let mut azs = HashSet::new(); + for (_sk_util, sk_info, az_id) in all_safekeepers.iter() { + if !azs.insert(az_id) { + continue; + } + sks.push(sk_info.clone()); + if sks.len() == 3 { + break; + } + } + if sks.len() == 3 { + Ok(sks) + } else { + Err(ApiError::InternalServerError(anyhow::anyhow!( + "couldn't find three safekeepers in different AZs for new timeline" + ))) + } + } + pub(crate) async fn safekeepers_list( &self, ) -> Result, DatabaseError> { @@ -8350,6 +8816,13 @@ impl Service { .ok_or(DatabaseError::Logical("Not found".to_string()))?; sk.set_scheduling_policy(scheduling_policy); + match scheduling_policy { + SkSchedulingPolicy::Active => (), + SkSchedulingPolicy::Decomissioned | SkSchedulingPolicy::Pause => { + locked.safekeeper_reconcilers.cancel_safekeeper(node_id); + } + } + locked.safekeepers = Arc::new(safekeepers); } Ok(()) diff --git a/storage_controller/src/service/safekeeper_reconciler.rs b/storage_controller/src/service/safekeeper_reconciler.rs new file mode 100644 index 0000000000..4fa465c307 --- /dev/null +++ b/storage_controller/src/service/safekeeper_reconciler.rs @@ -0,0 +1,340 @@ +use std::{collections::HashMap, str::FromStr, sync::Arc, time::Duration}; + +use clashmap::{ClashMap, Entry}; +use safekeeper_api::models::PullTimelineRequest; +use safekeeper_client::mgmt_api; +use tokio::sync::mpsc::{self, UnboundedReceiver, UnboundedSender}; +use tokio_util::sync::CancellationToken; +use tracing::Instrument; +use utils::{ + id::{NodeId, TenantId, TimelineId}, + logging::SecretString, +}; + +use crate::{ + persistence::SafekeeperTimelineOpKind, safekeeper::Safekeeper, + safekeeper_client::SafekeeperClient, +}; + +use super::Service; + +pub(crate) struct SafekeeperReconcilers { + cancel: CancellationToken, + reconcilers: HashMap, +} + +impl SafekeeperReconcilers { + pub fn new(cancel: CancellationToken) -> Self { + SafekeeperReconcilers { + cancel, + reconcilers: HashMap::new(), + } + } + pub(crate) fn schedule_request_vec( + &mut self, + service: &Arc, + reqs: Vec, + ) { + for req in reqs { + self.schedule_request(service, req); + } + } + pub(crate) fn schedule_request(&mut self, service: &Arc, req: ScheduleRequest) { + let node_id = req.safekeeper.get_id(); + let reconciler_handle = self.reconcilers.entry(node_id).or_insert_with(|| { + SafekeeperReconciler::spawn(self.cancel.child_token(), service.clone()) + }); + reconciler_handle.schedule_reconcile(req); + } + pub(crate) fn cancel_safekeeper(&mut self, node_id: NodeId) { + if let Some(handle) = self.reconcilers.remove(&node_id) { + handle.cancel.cancel(); + } + } +} + +/// Initial load of the pending operations from the db +pub(crate) async fn load_schedule_requests( + service: &Arc, + safekeepers: &HashMap, +) -> anyhow::Result> { + let pending_ops = service.persistence.list_pending_ops(None).await?; + let mut res = Vec::with_capacity(pending_ops.len()); + for op_persist in pending_ops { + let node_id = NodeId(op_persist.sk_id as u64); + let Some(sk) = safekeepers.get(&node_id) else { + // This shouldn't happen, at least the safekeeper should exist as decomissioned. + tracing::warn!( + tenant_id = op_persist.tenant_id, + timeline_id = op_persist.timeline_id, + "couldn't find safekeeper with pending op id {node_id} in list of stored safekeepers" + ); + continue; + }; + let sk = Box::new(sk.clone()); + let tenant_id = TenantId::from_str(&op_persist.tenant_id)?; + let timeline_id = TimelineId::from_str(&op_persist.timeline_id)?; + let host_list = match op_persist.op_kind { + SafekeeperTimelineOpKind::Delete => Vec::new(), + SafekeeperTimelineOpKind::Exclude => Vec::new(), + SafekeeperTimelineOpKind::Pull => { + // TODO this code is super hacky, it doesn't take migrations into account + let timeline_persist = service + .persistence + .get_timeline(tenant_id, timeline_id) + .await?; + let Some(timeline_persist) = timeline_persist else { + // This shouldn't happen, the timeline should still exist + tracing::warn!( + tenant_id = op_persist.tenant_id, + timeline_id = op_persist.timeline_id, + "couldn't find timeline for corresponding pull op" + ); + continue; + }; + timeline_persist + .sk_set + .iter() + .filter_map(|sk_id| { + let other_node_id = NodeId(*sk_id as u64); + if node_id == other_node_id { + // We obviously don't want to pull from ourselves + return None; + } + let Some(sk) = safekeepers.get(&other_node_id) else { + tracing::warn!( + "couldnt find safekeeper with pending op id {other_node_id}, not pulling from it" + ); + return None; + }; + Some((other_node_id, sk.base_url())) + }) + .collect::>() + } + }; + let req = ScheduleRequest { + safekeeper: sk, + host_list, + tenant_id, + timeline_id, + generation: op_persist.generation as u32, + kind: op_persist.op_kind, + }; + res.push(req); + } + Ok(res) +} + +pub(crate) struct ScheduleRequest { + pub(crate) safekeeper: Box, + pub(crate) host_list: Vec<(NodeId, String)>, + pub(crate) tenant_id: TenantId, + pub(crate) timeline_id: TimelineId, + pub(crate) generation: u32, + pub(crate) kind: SafekeeperTimelineOpKind, +} + +struct ReconcilerHandle { + tx: UnboundedSender<(ScheduleRequest, Arc)>, + ongoing_tokens: Arc>>, + cancel: CancellationToken, +} + +impl ReconcilerHandle { + /// Obtain a new token slot, cancelling any existing reconciliations for that timeline + fn new_token_slot( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + ) -> Arc { + let entry = self.ongoing_tokens.entry((tenant_id, timeline_id)); + if let Entry::Occupied(entry) = &entry { + let cancel: &CancellationToken = entry.get(); + cancel.cancel(); + } + entry.insert(Arc::new(self.cancel.child_token())).clone() + } + fn schedule_reconcile(&self, req: ScheduleRequest) { + let cancel = self.new_token_slot(req.tenant_id, req.timeline_id); + let hostname = req.safekeeper.skp.host.clone(); + if let Err(err) = self.tx.send((req, cancel)) { + tracing::info!("scheduling request onto {hostname} returned error: {err}"); + } + } +} + +pub(crate) struct SafekeeperReconciler { + service: Arc, + rx: UnboundedReceiver<(ScheduleRequest, Arc)>, + cancel: CancellationToken, +} + +impl SafekeeperReconciler { + fn spawn(cancel: CancellationToken, service: Arc) -> ReconcilerHandle { + // We hold the ServiceInner lock so we don't want to make sending to the reconciler channel to be blocking. + let (tx, rx) = mpsc::unbounded_channel(); + let mut reconciler = SafekeeperReconciler { + service, + rx, + cancel: cancel.clone(), + }; + let handle = ReconcilerHandle { + tx, + ongoing_tokens: Arc::new(ClashMap::new()), + cancel, + }; + tokio::spawn(async move { reconciler.run().await }); + handle + } + async fn run(&mut self) { + loop { + // TODO add parallelism with semaphore here + let req = tokio::select! { + req = self.rx.recv() => req, + _ = self.cancel.cancelled() => break, + }; + let Some((req, req_cancel)) = req else { break }; + if req_cancel.is_cancelled() { + continue; + } + + let kind = req.kind; + let tenant_id = req.tenant_id; + let timeline_id = req.timeline_id; + self.reconcile_one(req, req_cancel) + .instrument(tracing::info_span!( + "reconcile_one", + ?kind, + %tenant_id, + %timeline_id + )) + .await; + } + } + async fn reconcile_one(&self, req: ScheduleRequest, req_cancel: Arc) { + let req_host = req.safekeeper.skp.host.clone(); + match req.kind { + SafekeeperTimelineOpKind::Pull => { + let our_id = req.safekeeper.get_id(); + let http_hosts = req + .host_list + .iter() + .filter(|(node_id, _hostname)| *node_id != our_id) + .map(|(_, hostname)| hostname.clone()) + .collect::>(); + let pull_req = PullTimelineRequest { + http_hosts, + tenant_id: req.tenant_id, + timeline_id: req.timeline_id, + }; + self.reconcile_inner( + req, + async |client| client.pull_timeline(&pull_req).await, + |resp| { + tracing::info!( + "pulled timeline from {} onto {req_host}", + resp.safekeeper_host, + ); + }, + req_cancel, + ) + .await; + } + SafekeeperTimelineOpKind::Exclude => { + // TODO actually exclude instead of delete here + let tenant_id = req.tenant_id; + let timeline_id = req.timeline_id; + self.reconcile_inner( + req, + async |client| client.delete_timeline(tenant_id, timeline_id).await, + |_resp| { + tracing::info!("deleted timeline from {req_host}"); + }, + req_cancel, + ) + .await; + } + SafekeeperTimelineOpKind::Delete => { + let tenant_id = req.tenant_id; + let timeline_id = req.timeline_id; + self.reconcile_inner( + req, + async |client| client.delete_timeline(tenant_id, timeline_id).await, + |_resp| { + tracing::info!("deleted timeline from {req_host}"); + }, + req_cancel, + ) + .await; + } + } + } + async fn reconcile_inner( + &self, + req: ScheduleRequest, + closure: impl Fn(SafekeeperClient) -> F, + log_success: impl FnOnce(T) -> U, + req_cancel: Arc, + ) where + F: Future>, + { + let jwt = self + .service + .config + .safekeeper_jwt_token + .clone() + .map(SecretString::from); + let ssl_ca_cert = self.service.config.ssl_ca_cert.clone(); + loop { + let res = req + .safekeeper + .with_client_retries( + |client| { + let closure = &closure; + async move { closure(client).await } + }, + &jwt, + &ssl_ca_cert, + 3, + 10, + Duration::from_secs(10), + &req_cancel, + ) + .await; + match res { + Ok(resp) => { + log_success(resp); + let res = self + .service + .persistence + .remove_pending_op( + req.tenant_id, + req.timeline_id, + req.safekeeper.get_id(), + req.generation, + ) + .await; + if let Err(err) = res { + tracing::info!( + "couldn't remove reconciliation request onto {} from persistence: {err:?}", + req.safekeeper.skp.host + ); + } + return; + } + Err(mgmt_api::Error::Cancelled) => { + // On cancellation, the code that issued it will take care of removing db entries (if needed) + return; + } + Err(e) => { + tracing::info!( + "Reconcile attempt for safekeeper {} failed, retrying after sleep: {e:?}", + req.safekeeper.skp.host + ); + const SLEEP_TIME: Duration = Duration::from_secs(1); + tokio::time::sleep(SLEEP_TIME).await; + } + } + } + } +} From 420f7b07b477ca60c5a4850c23c649a2df25e84c Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 11 Mar 2025 08:22:56 +0100 Subject: [PATCH 140/207] add benchmark demonstrating `metrics`/`prometheus` crate multicore scalability pitfalls & workarounds (#11019) We use the `metrics` / `prometheus` crate in the Pageserver code base. This PR demonstrates - typical performance pitfalls with that crate - our current set of techniques to avoid most of these pitfalls. refs - https://github.com/neondatabase/neon/issues/10948 - https://github.com/neondatabase/neon/pull/7202 - I applied the `label_values__cache_label_values_lookup` technique there. - It didn't yield measurable results in high-level benchmarks though. --- pageserver/Cargo.toml | 4 + pageserver/benches/bench_metrics.rs | 366 ++++++++++++++++++++++++++++ 2 files changed, 370 insertions(+) create mode 100644 pageserver/benches/bench_metrics.rs diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 40ca1d3a33..a372be5044 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -120,6 +120,10 @@ harness = false name = "upload_queue" harness = false +[[bench]] +name = "bench_metrics" +harness = false + [[bin]] name = "test_helper_slow_client_reads" required-features = [ "testing" ] diff --git a/pageserver/benches/bench_metrics.rs b/pageserver/benches/bench_metrics.rs new file mode 100644 index 0000000000..38025124e1 --- /dev/null +++ b/pageserver/benches/bench_metrics.rs @@ -0,0 +1,366 @@ +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use utils::id::{TenantId, TimelineId}; + +// +// Demonstrates that repeat label values lookup is a multicore scalability bottleneck +// that is worth avoiding. +// +criterion_group!( + label_values, + label_values::bench_naive_usage, + label_values::bench_cache_label_values_lookup +); +mod label_values { + use super::*; + + pub fn bench_naive_usage(c: &mut Criterion) { + let mut g = c.benchmark_group("label_values__naive_usage"); + + for ntimelines in [1, 4, 8] { + g.bench_with_input( + BenchmarkId::new("ntimelines", ntimelines), + &ntimelines, + |b, ntimelines| { + b.iter_custom(|iters| { + let barrier = std::sync::Barrier::new(*ntimelines + 1); + + let timelines = (0..*ntimelines) + .map(|_| { + ( + TenantId::generate().to_string(), + "0000".to_string(), + TimelineId::generate().to_string(), + ) + }) + .collect::>(); + + let metric_vec = metrics::UIntGaugeVec::new( + metrics::opts!("testmetric", "testhelp"), + &["tenant_id", "shard_id", "timeline_id"], + ) + .unwrap(); + + std::thread::scope(|s| { + for (tenant_id, shard_id, timeline_id) in &timelines { + s.spawn(|| { + barrier.wait(); + for _ in 0..iters { + metric_vec + .with_label_values(&[tenant_id, shard_id, timeline_id]) + .inc(); + } + barrier.wait(); + }); + } + barrier.wait(); + let start = std::time::Instant::now(); + barrier.wait(); + start.elapsed() + }) + }) + }, + ); + } + g.finish(); + } + + pub fn bench_cache_label_values_lookup(c: &mut Criterion) { + let mut g = c.benchmark_group("label_values__cache_label_values_lookup"); + + for ntimelines in [1, 4, 8] { + g.bench_with_input( + BenchmarkId::new("ntimelines", ntimelines), + &ntimelines, + |b, ntimelines| { + b.iter_custom(|iters| { + let barrier = std::sync::Barrier::new(*ntimelines + 1); + + let timelines = (0..*ntimelines) + .map(|_| { + ( + TenantId::generate().to_string(), + "0000".to_string(), + TimelineId::generate().to_string(), + ) + }) + .collect::>(); + + let metric_vec = metrics::UIntGaugeVec::new( + metrics::opts!("testmetric", "testhelp"), + &["tenant_id", "shard_id", "timeline_id"], + ) + .unwrap(); + + std::thread::scope(|s| { + for (tenant_id, shard_id, timeline_id) in &timelines { + s.spawn(|| { + let metric = metric_vec.with_label_values(&[ + tenant_id, + shard_id, + timeline_id, + ]); + barrier.wait(); + for _ in 0..iters { + metric.inc(); + } + barrier.wait(); + }); + } + barrier.wait(); + let start = std::time::Instant::now(); + barrier.wait(); + start.elapsed() + }) + }) + }, + ); + } + g.finish(); + } +} + +// +// Demonstrates that even a single metric can be a scalability bottleneck +// if multiple threads in it concurrently but there's nothing we can do +// about it without changing the metrics framework to use e.g. sharded counte atomics. +// +criterion_group!( + single_metric_multicore_scalability, + single_metric_multicore_scalability::bench, +); +mod single_metric_multicore_scalability { + use super::*; + + pub fn bench(c: &mut Criterion) { + let mut g = c.benchmark_group("single_metric_multicore_scalability"); + + for nthreads in [1, 4, 8] { + g.bench_with_input( + BenchmarkId::new("nthreads", nthreads), + &nthreads, + |b, nthreads| { + b.iter_custom(|iters| { + let barrier = std::sync::Barrier::new(*nthreads + 1); + + let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap(); + + std::thread::scope(|s| { + for _ in 0..*nthreads { + s.spawn(|| { + barrier.wait(); + for _ in 0..iters { + metric.inc(); + } + barrier.wait(); + }); + } + barrier.wait(); + let start = std::time::Instant::now(); + barrier.wait(); + start.elapsed() + }) + }) + }, + ); + } + g.finish(); + } +} + +// +// Demonstrates that even if we cache label value, the propagation of such a cached metric value +// by Clone'ing it is a scalability bottleneck. +// The reason is that it's an Arc internally and thus there's contention on the reference count atomics. +// +// We can avoid that by having long-lived references per thread (= indirection). +// +criterion_group!( + propagation_of_cached_label_value, + propagation_of_cached_label_value::bench_naive, + propagation_of_cached_label_value::bench_long_lived_reference_per_thread, +); +mod propagation_of_cached_label_value { + use std::sync::Arc; + + use super::*; + + pub fn bench_naive(c: &mut Criterion) { + let mut g = c.benchmark_group("propagation_of_cached_label_value__naive"); + + for nthreads in [1, 4, 8] { + g.bench_with_input( + BenchmarkId::new("nthreads", nthreads), + &nthreads, + |b, nthreads| { + b.iter_custom(|iters| { + let barrier = std::sync::Barrier::new(*nthreads + 1); + + let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap(); + + std::thread::scope(|s| { + for _ in 0..*nthreads { + s.spawn(|| { + barrier.wait(); + for _ in 0..iters { + // propagating the metric means we'd clone it into the child RequestContext + let propagated = metric.clone(); + // simulate some work + criterion::black_box(propagated); + } + barrier.wait(); + }); + } + barrier.wait(); + let start = std::time::Instant::now(); + barrier.wait(); + start.elapsed() + }) + }) + }, + ); + } + g.finish(); + } + + pub fn bench_long_lived_reference_per_thread(c: &mut Criterion) { + let mut g = + c.benchmark_group("propagation_of_cached_label_value__long_lived_reference_per_thread"); + + for nthreads in [1, 4, 8] { + g.bench_with_input( + BenchmarkId::new("nthreads", nthreads), + &nthreads, + |b, nthreads| { + b.iter_custom(|iters| { + let barrier = std::sync::Barrier::new(*nthreads + 1); + + let metric = metrics::UIntGauge::new("testmetric", "testhelp").unwrap(); + + std::thread::scope(|s| { + for _ in 0..*nthreads { + s.spawn(|| { + // This is the technique. + let this_threads_metric_reference = Arc::new(metric.clone()); + + barrier.wait(); + for _ in 0..iters { + // propagating the metric means we'd clone it into the child RequestContext + let propagated = Arc::clone(&this_threads_metric_reference); + // simulate some work (include the pointer chase!) + criterion::black_box(&*propagated); + } + barrier.wait(); + }); + } + barrier.wait(); + let start = std::time::Instant::now(); + barrier.wait(); + start.elapsed() + }) + }) + }, + ); + } + } +} + +criterion_main!( + label_values, + single_metric_multicore_scalability, + propagation_of_cached_label_value +); + +/* +RUST_BACKTRACE=full cargo bench --bench bench_metrics -- --discard-baseline --noplot + +Results on an im4gn.2xlarge instance + +label_values__naive_usage/ntimelines/1 time: [178.71 ns 178.74 ns 178.76 ns] +label_values__naive_usage/ntimelines/4 time: [532.94 ns 539.59 ns 546.31 ns] +label_values__naive_usage/ntimelines/8 time: [1.1082 µs 1.1109 µs 1.1135 µs] +label_values__cache_label_values_lookup/ntimelines/1 time: [6.4116 ns 6.4119 ns 6.4123 ns] +label_values__cache_label_values_lookup/ntimelines/4 time: [6.3482 ns 6.3819 ns 6.4079 ns] +label_values__cache_label_values_lookup/ntimelines/8 time: [6.4213 ns 6.5279 ns 6.6293 ns] +single_metric_multicore_scalability/nthreads/1 time: [6.0102 ns 6.0104 ns 6.0106 ns] +single_metric_multicore_scalability/nthreads/4 time: [38.127 ns 38.275 ns 38.416 ns] +single_metric_multicore_scalability/nthreads/8 time: [73.698 ns 74.882 ns 75.864 ns] +propagation_of_cached_label_value__naive/nthreads/1 time: [14.424 ns 14.425 ns 14.426 ns] +propagation_of_cached_label_value__naive/nthreads/4 time: [100.71 ns 102.53 ns 104.35 ns] +propagation_of_cached_label_value__naive/nthreads/8 time: [211.50 ns 214.44 ns 216.87 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.135 ns 14.147 ns 14.160 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.243 ns 14.255 ns 14.268 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [14.470 ns 14.682 ns 14.895 ns] + +Results on an i3en.3xlarge instance + +label_values__naive_usage/ntimelines/1 time: [117.32 ns 117.53 ns 117.74 ns] +label_values__naive_usage/ntimelines/4 time: [736.58 ns 741.12 ns 745.61 ns] +label_values__naive_usage/ntimelines/8 time: [1.4513 µs 1.4596 µs 1.4665 µs] +label_values__cache_label_values_lookup/ntimelines/1 time: [8.0964 ns 8.0979 ns 8.0995 ns] +label_values__cache_label_values_lookup/ntimelines/4 time: [8.1620 ns 8.2912 ns 8.4491 ns] +label_values__cache_label_values_lookup/ntimelines/8 time: [14.148 ns 14.237 ns 14.324 ns] +single_metric_multicore_scalability/nthreads/1 time: [8.0993 ns 8.1013 ns 8.1046 ns] +single_metric_multicore_scalability/nthreads/4 time: [80.039 ns 80.672 ns 81.297 ns] +single_metric_multicore_scalability/nthreads/8 time: [153.58 ns 154.23 ns 154.90 ns] +propagation_of_cached_label_value__naive/nthreads/1 time: [13.924 ns 13.926 ns 13.928 ns] +propagation_of_cached_label_value__naive/nthreads/4 time: [143.66 ns 145.27 ns 146.59 ns] +propagation_of_cached_label_value__naive/nthreads/8 time: [296.51 ns 297.90 ns 299.30 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [14.013 ns 14.149 ns 14.308 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [14.311 ns 14.625 ns 14.984 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [25.981 ns 26.227 ns 26.476 ns] + +Results on an Standard L16s v3 (16 vcpus, 128 GiB memory) Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz + +label_values__naive_usage/ntimelines/1 time: [101.63 ns 101.84 ns 102.06 ns] +label_values__naive_usage/ntimelines/4 time: [417.55 ns 424.73 ns 432.63 ns] +label_values__naive_usage/ntimelines/8 time: [874.91 ns 889.51 ns 904.25 ns] +label_values__cache_label_values_lookup/ntimelines/1 time: [5.7724 ns 5.7760 ns 5.7804 ns] +label_values__cache_label_values_lookup/ntimelines/4 time: [7.8878 ns 7.9401 ns 8.0034 ns] +label_values__cache_label_values_lookup/ntimelines/8 time: [7.2621 ns 7.6354 ns 8.0337 ns] +single_metric_multicore_scalability/nthreads/1 time: [5.7710 ns 5.7744 ns 5.7785 ns] +single_metric_multicore_scalability/nthreads/4 time: [66.629 ns 66.994 ns 67.336 ns] +single_metric_multicore_scalability/nthreads/8 time: [130.85 ns 131.98 ns 132.91 ns] +propagation_of_cached_label_value__naive/nthreads/1 time: [11.540 ns 11.546 ns 11.553 ns] +propagation_of_cached_label_value__naive/nthreads/4 time: [131.22 ns 131.90 ns 132.56 ns] +propagation_of_cached_label_value__naive/nthreads/8 time: [260.99 ns 262.75 ns 264.26 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [11.544 ns 11.550 ns 11.557 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [11.568 ns 11.642 ns 11.763 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [13.416 ns 14.121 ns 14.886 ns + +Results on an M4 MAX MacBook Pro Total Number of Cores: 14 (10 performance and 4 efficiency) + +label_values__naive_usage/ntimelines/1 time: [52.711 ns 53.026 ns 53.381 ns] +label_values__naive_usage/ntimelines/4 time: [323.99 ns 330.40 ns 337.53 ns] +label_values__naive_usage/ntimelines/8 time: [1.1615 µs 1.1998 µs 1.2399 µs] +label_values__cache_label_values_lookup/ntimelines/1 time: [1.6635 ns 1.6715 ns 1.6809 ns] +label_values__cache_label_values_lookup/ntimelines/4 time: [1.7786 ns 1.7876 ns 1.8028 ns] +label_values__cache_label_values_lookup/ntimelines/8 time: [1.8195 ns 1.8371 ns 1.8665 ns] +single_metric_multicore_scalability/nthreads/1 time: [1.7764 ns 1.7909 ns 1.8079 ns] +single_metric_multicore_scalability/nthreads/4 time: [33.875 ns 34.868 ns 35.923 ns] +single_metric_multicore_scalability/nthreads/8 time: [226.85 ns 235.30 ns 244.18 ns] +propagation_of_cached_label_value__naive/nthreads/1 time: [3.4337 ns 3.4491 ns 3.4660 ns] +propagation_of_cached_label_value__naive/nthreads/4 time: [69.486 ns 71.937 ns 74.472 ns] +propagation_of_cached_label_value__naive/nthreads/8 time: [434.87 ns 456.47 ns 477.84 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [3.3767 ns 3.3974 ns 3.4220 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [3.6105 ns 4.2355 ns 5.1463 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [4.0889 ns 4.9714 ns 6.0779 ns] + +Results on a Hetzner AX102 AMD Ryzen 9 7950X3D 16-Core Processor + +label_values__naive_usage/ntimelines/1 time: [64.510 ns 64.559 ns 64.610 ns] +label_values__naive_usage/ntimelines/4 time: [309.71 ns 326.09 ns 342.32 ns] +label_values__naive_usage/ntimelines/8 time: [776.92 ns 819.35 ns 856.93 ns] +label_values__cache_label_values_lookup/ntimelines/1 time: [1.2855 ns 1.2943 ns 1.3021 ns] +label_values__cache_label_values_lookup/ntimelines/4 time: [1.3865 ns 1.4139 ns 1.4441 ns] +label_values__cache_label_values_lookup/ntimelines/8 time: [1.5311 ns 1.5669 ns 1.6046 ns] +single_metric_multicore_scalability/nthreads/1 time: [1.1927 ns 1.1981 ns 1.2049 ns] +single_metric_multicore_scalability/nthreads/4 time: [24.346 ns 25.439 ns 26.634 ns] +single_metric_multicore_scalability/nthreads/8 time: [58.666 ns 60.137 ns 61.486 ns] +propagation_of_cached_label_value__naive/nthreads/1 time: [2.7067 ns 2.7238 ns 2.7402 ns] +propagation_of_cached_label_value__naive/nthreads/4 time: [62.723 ns 66.214 ns 69.787 ns] +propagation_of_cached_label_value__naive/nthreads/8 time: [164.24 ns 170.10 ns 175.68 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/1 time: [2.2915 ns 2.2960 ns 2.3012 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/4 time: [2.5726 ns 2.6158 ns 2.6624 ns] +propagation_of_cached_label_value__long_lived_reference_per_thread/nthreads/8 time: [2.7068 ns 2.8243 ns 2.9824 ns] + +*/ From 7c462b3417ecd3ae3907f3480f3b8a8c99fc6d7b Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 11 Mar 2025 08:23:06 +0100 Subject: [PATCH 141/207] impr: propagate VirtualFile metrics via RequestContext (#7202) # Refs - fixes https://github.com/neondatabase/neon/issues/6107 # Problem `VirtualFile` currently parses the path it is opened with to identify the `tenant,shard,timeline` labels to be used for the `STORAGE_IO_SIZE` metric. Further, for each read or write call to VirtualFile, it uses `with_label_values` to retrieve the correct metrics object, which under the hood is a global hashmap guarded by a parking_lot mutex. We perform tens of thousands of reads and writes per second on every pageserver instance; thus, doing the mutex lock + hashmap lookup is wasteful. # Changes Apply the technique we use for all other timeline-scoped metrics to avoid the repeat `with_label_values`: add it to `TimelineMetrics`. Wrap `TimelineMetrics` into an `Arc`. Propagate the `Arc` down do `VirtualFile`, and use `Timeline::metrics::storage_io_size`. To avoid contention on the `Arc`'s refcount atomics between different connection handlers for the same timeline, we wrap it into another Arc. To avoid frequent allocations, we store that Arc> inside the per-connection timeline cache. Preliminary refactorings to enable this change: - https://github.com/neondatabase/neon/pull/11001 - https://github.com/neondatabase/neon/pull/11030 # Performance I ran the benchmarks in `test_runner/performance/pageserver/pagebench/test_pageserver_max_throughput_getpage_at_latest_lsn.py` on an `i3en.3xlarge` because that's what we currently run them on. None of the benchmarks shows a meaningful difference in latency or throughput or CPU utilization. I would have expected some improvement in the many-tenants-one-client-each workload because they all hit that hashmap constantly, and clone the same `UintCounter` / `Arc` inside of it. But apparently the overhead is miniscule compared to the remaining work we do per getpage. Yet, since the changes are already made, the added complexity is manageable, and the perf overhead of `with_label_values` demonstrable in micro-benchmarks, let's have this change anyway. Also, propagating TimelineMetrics through RequestContext might come in handy down the line. The micro-benchmark that demonstrates perf impact of `with_label_values`, along with other pitfalls and mitigation techniques around the `metrics`/`prometheus` crate: - https://github.com/neondatabase/neon/pull/11019 # Alternative Designs An earlier iteration of this PR stored an `Arc>` inside `RequestContext`. The problem is that this risks reference cycles if the RequestContext gets stored in an object that is owned directly or indirectly by `Timeline`. Ideally, we wouldn't be using this mess of Arc's at all and propagate Rust references instead. But tokio requires tasks to be `'static`, and so, we wouldn't be able to propagate references across task boundaries, which is incompatible with any sort of fan-out code we already have (e.g. concurrent IO) or future code (parallel compaction). So, opt for Arc for now. --- pageserver/src/context.rs | 191 +++++++++++++++++- pageserver/src/http/routes.rs | 43 ++-- pageserver/src/metrics.rs | 53 ++++- pageserver/src/page_service.rs | 44 +++- pageserver/src/pgdatadir_mapping.rs | 2 +- pageserver/src/tenant.rs | 88 +++++--- pageserver/src/tenant/blob_io.rs | 3 +- pageserver/src/tenant/disk_btree.rs | 25 ++- pageserver/src/tenant/ephemeral_file.rs | 3 +- pageserver/src/tenant/secondary/downloader.rs | 6 +- pageserver/src/tenant/size.rs | 2 +- .../src/tenant/storage_layer/delta_layer.rs | 3 +- .../src/tenant/storage_layer/image_layer.rs | 2 +- .../src/tenant/storage_layer/layer/tests.rs | 17 +- pageserver/src/tenant/timeline.rs | 11 +- pageserver/src/tenant/timeline/delete.rs | 5 +- .../src/tenant/timeline/eviction_task.rs | 3 +- pageserver/src/tenant/vectored_blob_io.rs | 3 +- pageserver/src/virtual_file.rs | 85 ++------ 19 files changed, 432 insertions(+), 157 deletions(-) diff --git a/pageserver/src/context.rs b/pageserver/src/context.rs index da9c095a15..e2a84d0c24 100644 --- a/pageserver/src/context.rs +++ b/pageserver/src/context.rs @@ -89,16 +89,112 @@ //! [`RequestContext`] argument. Functions in the middle of the call chain //! only need to pass it on. -use crate::task_mgr::TaskKind; +use std::sync::Arc; + +use once_cell::sync::Lazy; +use tracing::warn; +use utils::{id::TimelineId, shard::TenantShardId}; + +use crate::{ + metrics::{StorageIoSizeMetrics, TimelineMetrics}, + task_mgr::TaskKind, + tenant::Timeline, +}; // The main structure of this module, see module-level comment. -#[derive(Debug)] pub struct RequestContext { task_kind: TaskKind, download_behavior: DownloadBehavior, access_stats_behavior: AccessStatsBehavior, page_content_kind: PageContentKind, read_path_debug: bool, + scope: Scope, +} + +#[derive(Clone)] +pub(crate) enum Scope { + Global { + io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics, + }, + SecondaryTenant { + io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics, + }, + SecondaryTimeline { + io_size_metrics: crate::metrics::StorageIoSizeMetrics, + }, + Timeline { + // We wrap the `Arc`s inside another Arc to avoid child + // context creation contending for the ref counters of the Arc, + // which are shared among all tasks that operate on the timeline, especially + // concurrent page_service connections. + #[allow(clippy::redundant_allocation)] + arc_arc: Arc>, + }, + #[cfg(test)] + UnitTest { + io_size_metrics: &'static crate::metrics::StorageIoSizeMetrics, + }, +} + +static GLOBAL_IO_SIZE_METRICS: Lazy = + Lazy::new(|| crate::metrics::StorageIoSizeMetrics::new("*", "*", "*")); + +impl Scope { + pub(crate) fn new_global() -> Self { + Scope::Global { + io_size_metrics: &GLOBAL_IO_SIZE_METRICS, + } + } + /// NB: this allocates, so, use only at relatively long-lived roots, e.g., at start + /// of a compaction iteration. + pub(crate) fn new_timeline(timeline: &Timeline) -> Self { + Scope::Timeline { + arc_arc: Arc::new(Arc::clone(&timeline.metrics)), + } + } + pub(crate) fn new_page_service_pagestream( + timeline_handle: &crate::tenant::timeline::handle::Handle< + crate::page_service::TenantManagerTypes, + >, + ) -> Self { + Scope::Timeline { + arc_arc: Arc::clone(&timeline_handle.metrics), + } + } + pub(crate) fn new_secondary_timeline( + tenant_shard_id: &TenantShardId, + timeline_id: &TimelineId, + ) -> Self { + // TODO(https://github.com/neondatabase/neon/issues/11156): secondary timelines have no infrastructure for metrics lifecycle. + + let tenant_id = tenant_shard_id.tenant_id.to_string(); + let shard_id = tenant_shard_id.shard_slug().to_string(); + let timeline_id = timeline_id.to_string(); + + let io_size_metrics = + crate::metrics::StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id); + Scope::SecondaryTimeline { io_size_metrics } + } + pub(crate) fn new_secondary_tenant(_tenant_shard_id: &TenantShardId) -> Self { + // Before propagating metrics via RequestContext, the labels were inferred from file path. + // The only user of VirtualFile at tenant scope is the heatmap download & read. + // The inferred labels for the path of the heatmap file on local disk were that of the global metric (*,*,*). + // Thus, we do the same here, and extend that for anything secondary-tenant scoped. + // + // If we want to have (tenant_id, shard_id, '*') labels for secondary tenants in the future, + // we will need to think about the metric lifecycle, i.e., remove them during secondary tenant shutdown, + // like we do for attached timelines. (We don't have attached-tenant-scoped usage of VirtualFile + // at this point, so, we were able to completely side-step tenant-scoped stuff there). + Scope::SecondaryTenant { + io_size_metrics: &GLOBAL_IO_SIZE_METRICS, + } + } + #[cfg(test)] + pub(crate) fn new_unit_test() -> Self { + Scope::UnitTest { + io_size_metrics: &GLOBAL_IO_SIZE_METRICS, + } + } } /// The kind of access to the page cache. @@ -157,6 +253,7 @@ impl RequestContextBuilder { access_stats_behavior: AccessStatsBehavior::Update, page_content_kind: PageContentKind::Unknown, read_path_debug: false, + scope: Scope::new_global(), }, } } @@ -171,10 +268,16 @@ impl RequestContextBuilder { access_stats_behavior: original.access_stats_behavior, page_content_kind: original.page_content_kind, read_path_debug: original.read_path_debug, + scope: original.scope.clone(), }, } } + pub fn task_kind(mut self, k: TaskKind) -> Self { + self.inner.task_kind = k; + self + } + /// Configure the DownloadBehavior of the context: whether to /// download missing layers, and/or warn on the download. pub fn download_behavior(mut self, b: DownloadBehavior) -> Self { @@ -199,6 +302,11 @@ impl RequestContextBuilder { self } + pub(crate) fn scope(mut self, s: Scope) -> Self { + self.inner.scope = s; + self + } + pub fn build(self) -> RequestContext { self.inner } @@ -281,7 +389,50 @@ impl RequestContext { } fn child_impl(&self, task_kind: TaskKind, download_behavior: DownloadBehavior) -> Self { - Self::new(task_kind, download_behavior) + RequestContextBuilder::extend(self) + .task_kind(task_kind) + .download_behavior(download_behavior) + .build() + } + + pub fn with_scope_timeline(&self, timeline: &Arc) -> Self { + RequestContextBuilder::extend(self) + .scope(Scope::new_timeline(timeline)) + .build() + } + + pub(crate) fn with_scope_page_service_pagestream( + &self, + timeline_handle: &crate::tenant::timeline::handle::Handle< + crate::page_service::TenantManagerTypes, + >, + ) -> Self { + RequestContextBuilder::extend(self) + .scope(Scope::new_page_service_pagestream(timeline_handle)) + .build() + } + + pub fn with_scope_secondary_timeline( + &self, + tenant_shard_id: &TenantShardId, + timeline_id: &TimelineId, + ) -> Self { + RequestContextBuilder::extend(self) + .scope(Scope::new_secondary_timeline(tenant_shard_id, timeline_id)) + .build() + } + + pub fn with_scope_secondary_tenant(&self, tenant_shard_id: &TenantShardId) -> Self { + RequestContextBuilder::extend(self) + .scope(Scope::new_secondary_tenant(tenant_shard_id)) + .build() + } + + #[cfg(test)] + pub fn with_scope_unit_test(&self) -> Self { + RequestContextBuilder::new(TaskKind::UnitTest) + .scope(Scope::new_unit_test()) + .build() } pub fn task_kind(&self) -> TaskKind { @@ -303,4 +454,38 @@ impl RequestContext { pub(crate) fn read_path_debug(&self) -> bool { self.read_path_debug } + + pub(crate) fn io_size_metrics(&self) -> &StorageIoSizeMetrics { + match &self.scope { + Scope::Global { io_size_metrics } => { + let is_unit_test = cfg!(test); + let is_regress_test_build = cfg!(feature = "testing"); + if is_unit_test || is_regress_test_build { + panic!("all VirtualFile instances are timeline-scoped"); + } else { + use once_cell::sync::Lazy; + use std::sync::Mutex; + use std::time::Duration; + use utils::rate_limit::RateLimit; + static LIMIT: Lazy> = + Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(1)))); + let mut guard = LIMIT.lock().unwrap(); + guard.call2(|rate_limit_stats| { + warn!( + %rate_limit_stats, + backtrace=%std::backtrace::Backtrace::force_capture(), + "all VirtualFile instances are timeline-scoped", + ); + }); + + io_size_metrics + } + } + Scope::Timeline { arc_arc } => &arc_arc.storage_io_size, + Scope::SecondaryTimeline { io_size_metrics } => io_size_metrics, + Scope::SecondaryTenant { io_size_metrics } => io_size_metrics, + #[cfg(test)] + Scope::UnitTest { io_size_metrics } => io_size_metrics, + } + } } diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 3c0c23a56d..77bfab47e0 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -55,6 +55,7 @@ use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; use crate::config::PageServerConf; +use crate::context; use crate::context::{DownloadBehavior, RequestContext, RequestContextBuilder}; use crate::deletion_queue::DeletionQueueClient; use crate::pgdatadir_mapping::LsnForTimestamp; @@ -953,12 +954,13 @@ async fn timeline_detail_handler( tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?; let timeline = tenant.get_timeline(timeline_id, false)?; + let ctx = &ctx.with_scope_timeline(&timeline); let timeline_info = build_timeline_info( &timeline, include_non_incremental_logical_size.unwrap_or(false), force_await_initial_logical_size.unwrap_or(false), - &ctx, + ctx, ) .await .context("get local timeline info") @@ -1002,7 +1004,8 @@ async fn get_lsn_by_timestamp_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); let result = timeline .find_lsn_for_timestamp(timestamp_pg, &cancel, &ctx) .await?; @@ -1074,7 +1077,8 @@ async fn get_timestamp_of_lsn_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); let result = timeline.get_timestamp_for_lsn(lsn, &ctx).await?; match result { @@ -1429,7 +1433,8 @@ async fn timeline_layer_scan_disposable_keys( active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); let guard = timeline.layers.read().await; let Some(layer) = guard.try_get_from_key(&layer_name.clone().into()) else { @@ -1515,7 +1520,8 @@ async fn timeline_download_heatmap_layers_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); let max_concurrency = get_config(&request) .remote_storage_config @@ -1563,7 +1569,8 @@ async fn layer_download_handler( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); let downloaded = timeline .download_layer(&layer_name, &ctx) .await @@ -2299,8 +2306,8 @@ async fn timeline_compact_handler( .unwrap_or(false); async { - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline); if scheduled { let tenant = state .tenant_manager @@ -2407,8 +2414,8 @@ async fn timeline_checkpoint_handler( parse_query_param::<_, bool>(&request, "wait_until_uploaded")?.unwrap_or(false); async { - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline); if wait_until_flushed { timeline.freeze_and_flush().await } else { @@ -2463,7 +2470,8 @@ async fn timeline_download_remote_layers_handler_post( let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id) .await?; - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download) + .with_scope_timeline(&timeline); match timeline.spawn_download_all_remote_layers(body, &ctx).await { Ok(st) => json_response(StatusCode::ACCEPTED, st), Err(st) => json_response(StatusCode::CONFLICT, st), @@ -2546,6 +2554,7 @@ async fn timeline_detach_ancestor_handler( tracing::info!("all timeline upload queues are drained"); let timeline = tenant.get_timeline(timeline_id, true)?; + let ctx = &ctx.with_scope_timeline(&timeline); let progress = timeline .prepare_to_detach_from_ancestor(&tenant, options, ctx) @@ -2652,8 +2661,9 @@ async fn getpage_at_lsn_handler_inner( async { let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); // Enable read path debugging - let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true).build(); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; + let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true) + .scope(context::Scope::new_timeline(&timeline)).build(); // Use last_record_lsn if no lsn is provided let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn()); @@ -2687,8 +2697,8 @@ async fn timeline_collect_keyspace( let at_lsn: Option = parse_query_param(&request, "at_lsn")?; async { - let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?; + let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download).with_scope_timeline(&timeline); let at_lsn = at_lsn.unwrap_or_else(|| timeline.get_last_record_lsn()); let (dense_ks, sparse_ks) = timeline .collect_keyspace(at_lsn, &ctx) @@ -3325,7 +3335,7 @@ async fn put_tenant_timeline_import_basebackup( tenant.wait_to_become_active(ACTIVE_TENANT_TIMEOUT).await?; - let timeline = tenant + let (timeline, timeline_ctx) = tenant .create_empty_timeline(timeline_id, base_lsn, pg_version, &ctx) .map_err(ApiError::InternalServerError) .await?; @@ -3344,7 +3354,13 @@ async fn put_tenant_timeline_import_basebackup( info!("importing basebackup"); timeline - .import_basebackup_from_tar(tenant.clone(), &mut body, base_lsn, broker_client, &ctx) + .import_basebackup_from_tar( + tenant.clone(), + &mut body, + base_lsn, + broker_client, + &timeline_ctx, + ) .await .map_err(ApiError::InternalServerError)?; @@ -3384,6 +3400,7 @@ async fn put_tenant_timeline_import_wal( let state = get_state(&request); let timeline = active_timeline_of_active_tenant(&state.tenant_manager, TenantShardId::unsharded(tenant_id), timeline_id).await?; + let ctx = RequestContextBuilder::extend(&ctx).scope(context::Scope::new_timeline(&timeline)).build(); let mut body = StreamReader::new(request.into_body().map(|res| { res.map_err(|error| { diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index b5b4e5c91f..fd90ef8cd7 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -1227,11 +1227,24 @@ impl StorageIoTime { pub(crate) static STORAGE_IO_TIME_METRIC: Lazy = Lazy::new(StorageIoTime::new); -const STORAGE_IO_SIZE_OPERATIONS: &[&str] = &["read", "write"]; +#[derive(Clone, Copy)] +#[repr(usize)] +enum StorageIoSizeOperation { + Read, + Write, +} + +impl StorageIoSizeOperation { + const VARIANTS: &'static [&'static str] = &["read", "write"]; + + fn as_str(&self) -> &'static str { + Self::VARIANTS[*self as usize] + } +} // Needed for the https://neonprod.grafana.net/d/5uK9tHL4k/picking-tenant-for-relocation?orgId=1 -pub(crate) static STORAGE_IO_SIZE: Lazy = Lazy::new(|| { - register_int_gauge_vec!( +static STORAGE_IO_SIZE: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( "pageserver_io_operations_bytes_total", "Total amount of bytes read/written in IO operations", &["operation", "tenant_id", "shard_id", "timeline_id"] @@ -1239,6 +1252,34 @@ pub(crate) static STORAGE_IO_SIZE: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +#[derive(Clone, Debug)] +pub(crate) struct StorageIoSizeMetrics { + pub read: UIntGauge, + pub write: UIntGauge, +} + +impl StorageIoSizeMetrics { + pub(crate) fn new(tenant_id: &str, shard_id: &str, timeline_id: &str) -> Self { + let read = STORAGE_IO_SIZE + .get_metric_with_label_values(&[ + StorageIoSizeOperation::Read.as_str(), + tenant_id, + shard_id, + timeline_id, + ]) + .unwrap(); + let write = STORAGE_IO_SIZE + .get_metric_with_label_values(&[ + StorageIoSizeOperation::Write.as_str(), + tenant_id, + shard_id, + timeline_id, + ]) + .unwrap(); + Self { read, write } + } +} + #[cfg(not(test))] pub(crate) mod virtual_file_descriptor_cache { use super::*; @@ -2821,6 +2862,7 @@ pub(crate) struct TimelineMetrics { /// Number of valid LSN leases. pub valid_lsn_lease_count_gauge: UIntGauge, pub wal_records_received: IntCounter, + pub storage_io_size: StorageIoSizeMetrics, shutdown: std::sync::atomic::AtomicBool, } @@ -2956,6 +2998,8 @@ impl TimelineMetrics { .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id); + TimelineMetrics { tenant_id, shard_id, @@ -2985,6 +3029,7 @@ impl TimelineMetrics { evictions_with_low_residence_duration: std::sync::RwLock::new( evictions_with_low_residence_duration, ), + storage_io_size, valid_lsn_lease_count_gauge, wal_records_received, shutdown: std::sync::atomic::AtomicBool::default(), @@ -3175,7 +3220,7 @@ impl TimelineMetrics { ]); } - for op in STORAGE_IO_SIZE_OPERATIONS { + for op in StorageIoSizeOperation::VARIANTS { let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]); } diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index ba2ed9dc81..f2d2ab05ad 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -56,6 +56,7 @@ use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::metrics::{ self, COMPUTE_COMMANDS_COUNTERS, ComputeCommandKind, LIVE_CONNECTIONS, SmgrOpTimer, + TimelineMetrics, }; use crate::pgdatadir_mapping::Version; use crate::span::{ @@ -423,6 +424,9 @@ impl timeline::handle::Types for TenantManagerTypes { pub(crate) struct TenantManagerCacheItem { pub(crate) timeline: Arc, + // allow() for cheap propagation through RequestContext inside a task + #[allow(clippy::redundant_allocation)] + pub(crate) metrics: Arc>, #[allow(dead_code)] // we store it to keep the gate open pub(crate) gate_guard: GateGuard, } @@ -506,8 +510,11 @@ impl timeline::handle::TenantManager for TenantManagerWrappe } }; + let metrics = Arc::new(Arc::clone(&timeline.metrics)); + Ok(TenantManagerCacheItem { timeline, + metrics, gate_guard, }) } @@ -1238,6 +1245,14 @@ impl PageServerHandler { ), QueryError, > { + macro_rules! upgrade_handle_and_set_context { + ($shard:ident) => {{ + let weak_handle = &$shard; + let handle = weak_handle.upgrade()?; + let ctx = ctx.with_scope_page_service_pagestream(&handle); + (handle, ctx) + }}; + } Ok(match batch { BatchedFeMessage::Exists { span, @@ -1246,9 +1261,10 @@ impl PageServerHandler { req, } => { fail::fail_point!("ps::handle-pagerequest-message::exists"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( vec![ - self.handle_get_rel_exists_request(&*shard.upgrade()?, &req, ctx) + self.handle_get_rel_exists_request(&shard, &req, &ctx) .instrument(span.clone()) .await .map(|msg| (msg, timer)) @@ -1264,9 +1280,10 @@ impl PageServerHandler { req, } => { fail::fail_point!("ps::handle-pagerequest-message::nblocks"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( vec![ - self.handle_get_nblocks_request(&*shard.upgrade()?, &req, ctx) + self.handle_get_nblocks_request(&shard, &req, &ctx) .instrument(span.clone()) .await .map(|msg| (msg, timer)) @@ -1282,17 +1299,18 @@ impl PageServerHandler { pages, } => { fail::fail_point!("ps::handle-pagerequest-message::getpage"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( { let npages = pages.len(); trace!(npages, "handling getpage request"); let res = self .handle_get_page_at_lsn_request_batched( - &*shard.upgrade()?, + &shard, effective_request_lsn, pages, io_concurrency, - ctx, + &ctx, ) .instrument(span.clone()) .await; @@ -1309,9 +1327,10 @@ impl PageServerHandler { req, } => { fail::fail_point!("ps::handle-pagerequest-message::dbsize"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( vec![ - self.handle_db_size_request(&*shard.upgrade()?, &req, ctx) + self.handle_db_size_request(&shard, &req, &ctx) .instrument(span.clone()) .await .map(|msg| (msg, timer)) @@ -1327,9 +1346,10 @@ impl PageServerHandler { req, } => { fail::fail_point!("ps::handle-pagerequest-message::slrusegment"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( vec![ - self.handle_get_slru_segment_request(&*shard.upgrade()?, &req, ctx) + self.handle_get_slru_segment_request(&shard, &req, &ctx) .instrument(span.clone()) .await .map(|msg| (msg, timer)) @@ -1345,12 +1365,13 @@ impl PageServerHandler { requests, } => { fail::fail_point!("ps::handle-pagerequest-message::test"); + let (shard, ctx) = upgrade_handle_and_set_context!(shard); ( { let npages = requests.len(); trace!(npages, "handling getpage request"); let res = self - .handle_test_request_batch(&*shard.upgrade()?, requests, ctx) + .handle_test_request_batch(&shard, requests, &ctx) .instrument(span.clone()) .await; assert_eq!(res.len(), npages); @@ -2126,6 +2147,7 @@ impl PageServerHandler { .get(tenant_id, timeline_id, ShardSelector::Zero) .await?; set_tracing_field_shard_id(&timeline); + let ctx = ctx.with_scope_timeline(&timeline); if timeline.is_archived() == Some(true) { tracing::info!( @@ -2143,7 +2165,7 @@ impl PageServerHandler { lsn, crate::tenant::timeline::WaitLsnWaiter::PageService, crate::tenant::timeline::WaitLsnTimeout::Default, - ctx, + &ctx, ) .await?; timeline @@ -2169,7 +2191,7 @@ impl PageServerHandler { prev_lsn, full_backup, replica, - ctx, + &ctx, ) .await .map_err(map_basebackup_error)?; @@ -2192,7 +2214,7 @@ impl PageServerHandler { prev_lsn, full_backup, replica, - ctx, + &ctx, ) .await .map_err(map_basebackup_error)?; @@ -2209,7 +2231,7 @@ impl PageServerHandler { prev_lsn, full_backup, replica, - ctx, + &ctx, ) .await .map_err(map_basebackup_error)?; diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 8bcc6d58ec..4685f9383b 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -2758,7 +2758,7 @@ mod tests { TimelineId::from_array(hex!("11223344556677881122334455667788")); let (tenant, ctx) = harness.load().await; - let tline = tenant + let (tline, ctx) = tenant .create_empty_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) .await?; let tline = tline.raw_timeline().unwrap(); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index c78d15c9b5..3a34c8e254 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -77,6 +77,8 @@ use self::timeline::{ EvictionTaskTenantState, GcCutoffs, TimelineDeleteProgress, TimelineResources, WaitLsnError, }; use crate::config::PageServerConf; +use crate::context; +use crate::context::RequestContextBuilder; use crate::context::{DownloadBehavior, RequestContext}; use crate::deletion_queue::{DeletionQueueClient, DeletionQueueError}; use crate::l0_flush::L0FlushGlobalState; @@ -1114,7 +1116,7 @@ impl Tenant { } }; - let timeline = self.create_timeline_struct( + let (timeline, timeline_ctx) = self.create_timeline_struct( timeline_id, &metadata, previous_heatmap, @@ -1124,6 +1126,7 @@ impl Tenant { idempotency.clone(), index_part.gc_compaction.clone(), index_part.rel_size_migration.clone(), + ctx, )?; let disk_consistent_lsn = timeline.get_disk_consistent_lsn(); anyhow::ensure!( @@ -1257,7 +1260,7 @@ impl Tenant { match activate { ActivateTimelineArgs::Yes { broker_client } => { info!("activating timeline after reload from pgdata import task"); - timeline.activate(self.clone(), broker_client, None, ctx); + timeline.activate(self.clone(), broker_client, None, &timeline_ctx); } ActivateTimelineArgs::No => (), } @@ -1765,6 +1768,7 @@ impl Tenant { import_pgdata, ActivateTimelineArgs::No, guard, + ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn), )); } } @@ -1782,6 +1786,7 @@ impl Tenant { timeline_id, &index_part.metadata, remote_timeline_client, + ctx, ) .instrument(tracing::info_span!("timeline_delete", %timeline_id)) .await @@ -2219,7 +2224,7 @@ impl Tenant { self.clone(), broker_client.clone(), background_jobs_can_start, - &ctx, + &ctx.with_scope_timeline(&timeline), ); } @@ -2416,8 +2421,8 @@ impl Tenant { new_timeline_id: TimelineId, initdb_lsn: Lsn, pg_version: u32, - _ctx: &RequestContext, - ) -> anyhow::Result { + ctx: &RequestContext, + ) -> anyhow::Result<(UninitializedTimeline, RequestContext)> { anyhow::ensure!( self.is_active(), "Cannot create empty timelines on inactive tenant" @@ -2452,6 +2457,7 @@ impl Tenant { initdb_lsn, None, None, + ctx, ) .await } @@ -2469,7 +2475,7 @@ impl Tenant { pg_version: u32, ctx: &RequestContext, ) -> anyhow::Result> { - let uninit_tl = self + let (uninit_tl, ctx) = self .create_empty_timeline(new_timeline_id, initdb_lsn, pg_version, ctx) .await?; let tline = uninit_tl.raw_timeline().expect("we just created it"); @@ -2481,7 +2487,7 @@ impl Tenant { .init_empty_test_timeline() .context("init_empty_test_timeline")?; modification - .commit(ctx) + .commit(&ctx) .await .context("commit init_empty_test_timeline modification")?; @@ -2699,7 +2705,12 @@ impl Tenant { // doing stuff before the IndexPart is durable in S3, which is done by the previous section. let activated_timeline = match result { CreateTimelineResult::Created(timeline) => { - timeline.activate(self.clone(), broker_client, None, ctx); + timeline.activate( + self.clone(), + broker_client, + None, + &ctx.with_scope_timeline(&timeline), + ); timeline } CreateTimelineResult::Idempotent(timeline) => { @@ -2761,10 +2772,9 @@ impl Tenant { } }; - let mut uninit_timeline = { + let (mut uninit_timeline, timeline_ctx) = { let this = &self; let initdb_lsn = Lsn(0); - let _ctx = ctx; async move { let new_metadata = TimelineMetadata::new( // Initialize disk_consistent LSN to 0, The caller must import some data to @@ -2784,6 +2794,7 @@ impl Tenant { initdb_lsn, None, None, + ctx, ) .await } @@ -2813,6 +2824,7 @@ impl Tenant { index_part, activate, timeline_create_guard, + timeline_ctx.detached_child(TaskKind::ImportPgdata, DownloadBehavior::Warn), )); // NB: the timeline doesn't exist in self.timelines at this point @@ -2826,6 +2838,7 @@ impl Tenant { index_part: import_pgdata::index_part_format::Root, activate: ActivateTimelineArgs, timeline_create_guard: TimelineCreateGuard, + ctx: RequestContext, ) { debug_assert_current_span_has_tenant_and_timeline_id(); info!("starting"); @@ -2837,6 +2850,7 @@ impl Tenant { index_part, activate, timeline_create_guard, + ctx, ) .await; if let Err(err) = &res { @@ -2852,9 +2866,8 @@ impl Tenant { index_part: import_pgdata::index_part_format::Root, activate: ActivateTimelineArgs, timeline_create_guard: TimelineCreateGuard, + ctx: RequestContext, ) -> Result<(), anyhow::Error> { - let ctx = RequestContext::new(TaskKind::ImportPgdata, DownloadBehavior::Warn); - info!("importing pgdata"); import_pgdata::doit(&timeline, index_part, &ctx, self.cancel.clone()) .await @@ -3063,6 +3076,7 @@ impl Tenant { let mut has_pending_l0 = false; for timeline in compact_l0 { + let ctx = &ctx.with_scope_timeline(&timeline); let outcome = timeline .compact(cancel, CompactFlags::OnlyL0Compaction.into(), ctx) .instrument(info_span!("compact_timeline", timeline_id = %timeline.timeline_id)) @@ -3096,6 +3110,7 @@ impl Tenant { if !timeline.is_active() { continue; } + let ctx = &ctx.with_scope_timeline(&timeline); let mut outcome = timeline .compact(cancel, EnumSet::default(), ctx) @@ -3321,7 +3336,7 @@ impl Tenant { self.clone(), broker_client.clone(), background_jobs_can_start, - ctx, + &ctx.with_scope_timeline(timeline), ); activated_timelines += 1; } @@ -4136,7 +4151,8 @@ impl Tenant { create_idempotency: CreateTimelineIdempotency, gc_compaction_state: Option, rel_size_v2_status: Option, - ) -> anyhow::Result> { + ctx: &RequestContext, + ) -> anyhow::Result<(Arc, RequestContext)> { let state = match cause { CreateTimelineCause::Load => { let ancestor_id = new_metadata.ancestor_timeline(); @@ -4172,7 +4188,11 @@ impl Tenant { self.cancel.child_token(), ); - Ok(timeline) + let timeline_ctx = RequestContextBuilder::extend(ctx) + .scope(context::Scope::new_timeline(&timeline)) + .build(); + + Ok((timeline, timeline_ctx)) } /// [`Tenant::shutdown`] must be called before dropping the returned [`Tenant`] object @@ -4588,6 +4608,7 @@ impl Tenant { // Ensures all timelines use the same start time when computing the time cutoff. let now_ts_for_pitr_calc = SystemTime::now(); for timeline in timelines.iter() { + let ctx = &ctx.with_scope_timeline(timeline); let cutoff = timeline .get_last_record_lsn() .checked_sub(horizon) @@ -4761,7 +4782,7 @@ impl Tenant { src_timeline: &Arc, dst_id: TimelineId, start_lsn: Option, - _ctx: &RequestContext, + ctx: &RequestContext, ) -> Result { let src_id = src_timeline.timeline_id; @@ -4864,7 +4885,7 @@ impl Tenant { src_timeline.pg_version, ); - let uninitialized_timeline = self + let (uninitialized_timeline, _timeline_ctx) = self .prepare_new_timeline( dst_id, &metadata, @@ -4872,6 +4893,7 @@ impl Tenant { start_lsn + 1, Some(Arc::clone(src_timeline)), Some(src_timeline.get_rel_size_v2_status()), + ctx, ) .await?; @@ -5138,7 +5160,7 @@ impl Tenant { pgdata_lsn, pg_version, ); - let mut raw_timeline = self + let (mut raw_timeline, timeline_ctx) = self .prepare_new_timeline( timeline_id, &new_metadata, @@ -5146,6 +5168,7 @@ impl Tenant { pgdata_lsn, None, None, + ctx, ) .await?; @@ -5156,7 +5179,7 @@ impl Tenant { &unfinished_timeline, &pgdata_path, pgdata_lsn, - ctx, + &timeline_ctx, ) .await .with_context(|| { @@ -5217,6 +5240,7 @@ impl Tenant { /// An empty layer map is initialized, and new data and WAL can be imported starting /// at 'disk_consistent_lsn'. After any initial data has been imported, call /// `finish_creation` to insert the Timeline into the timelines map. + #[allow(clippy::too_many_arguments)] async fn prepare_new_timeline<'a>( &'a self, new_timeline_id: TimelineId, @@ -5225,7 +5249,8 @@ impl Tenant { start_lsn: Lsn, ancestor: Option>, rel_size_v2_status: Option, - ) -> anyhow::Result> { + ctx: &RequestContext, + ) -> anyhow::Result<(UninitializedTimeline<'a>, RequestContext)> { let tenant_shard_id = self.tenant_shard_id; let resources = self.build_timeline_resources(new_timeline_id); @@ -5233,7 +5258,7 @@ impl Tenant { .remote_client .init_upload_queue_for_empty_remote(new_metadata, rel_size_v2_status.clone())?; - let timeline_struct = self + let (timeline_struct, timeline_ctx) = self .create_timeline_struct( new_timeline_id, new_metadata, @@ -5244,6 +5269,7 @@ impl Tenant { create_guard.idempotency.clone(), None, rel_size_v2_status, + ctx, ) .context("Failed to create timeline data structure")?; @@ -5264,10 +5290,13 @@ impl Tenant { "Successfully created initial files for timeline {tenant_shard_id}/{new_timeline_id}" ); - Ok(UninitializedTimeline::new( - self, - new_timeline_id, - Some((timeline_struct, create_guard)), + Ok(( + UninitializedTimeline::new( + self, + new_timeline_id, + Some((timeline_struct, create_guard)), + ), + timeline_ctx, )) } @@ -5802,7 +5831,8 @@ pub(crate) mod harness { } pub(crate) async fn load(&self) -> (Arc, RequestContext) { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error) + .with_scope_unit_test(); ( self.do_try_load(&ctx) .await @@ -6825,7 +6855,7 @@ mod tests { let (tenant, ctx) = harness.load().await; let io_concurrency = IoConcurrency::spawn_for_test(); - let tline = tenant + let (tline, ctx) = tenant .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx) .await?; let tline = tline.raw_timeline().unwrap(); @@ -7447,7 +7477,7 @@ mod tests { .await; let initdb_lsn = Lsn(0x20); - let utline = tenant + let (utline, ctx) = tenant .create_empty_timeline(TIMELINE_ID, initdb_lsn, DEFAULT_PG_VERSION, &ctx) .await?; let tline = utline.raw_timeline().unwrap(); @@ -7514,7 +7544,7 @@ mod tests { let harness = TenantHarness::create(name).await?; { let (tenant, ctx) = harness.load().await; - let tline = tenant + let (tline, _ctx) = tenant .create_empty_timeline(TIMELINE_ID, Lsn(0), DEFAULT_PG_VERSION, &ctx) .await?; // Leave the timeline ID in [`Tenant::timelines_creating`] to exclude attempting to create it again diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index b16a88eaa4..ff9a7e57b6 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -471,7 +471,8 @@ pub(crate) mod tests { blobs: &[Vec], compression: bool, ) -> Result<(), Error> { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let (_temp_dir, pathbuf, offsets) = write_maybe_compressed::(blobs, compression, &ctx).await?; diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs index 73c105b34e..1791e5996c 100644 --- a/pageserver/src/tenant/disk_btree.rs +++ b/pageserver/src/tenant/disk_btree.rs @@ -32,8 +32,7 @@ use hex; use thiserror::Error; use tracing::error; -use crate::context::{DownloadBehavior, RequestContext}; -use crate::task_mgr::TaskKind; +use crate::context::RequestContext; use crate::tenant::block_io::{BlockReader, BlockWriter}; // The maximum size of a value stored in the B-tree. 5 bytes is enough currently. @@ -477,16 +476,15 @@ where } #[allow(dead_code)] - pub async fn dump(&self) -> Result<()> { + pub async fn dump(&self, ctx: &RequestContext) -> Result<()> { let mut stack = Vec::new(); - let ctx = RequestContext::new(TaskKind::DebugTool, DownloadBehavior::Error); stack.push((self.root_blk, String::new(), 0, 0, 0)); let block_cursor = self.reader.block_cursor(); while let Some((blknum, path, depth, child_idx, key_off)) = stack.pop() { - let blk = block_cursor.read_blk(self.start_blk + blknum, &ctx).await?; + let blk = block_cursor.read_blk(self.start_blk + blknum, ctx).await?; let buf: &[u8] = blk.as_ref(); let node = OnDiskNode::::deparse(buf)?; @@ -835,6 +833,8 @@ pub(crate) mod tests { use rand::Rng; use super::*; + use crate::context::DownloadBehavior; + use crate::task_mgr::TaskKind; use crate::tenant::block_io::{BlockCursor, BlockLease, BlockReaderRef}; #[derive(Clone, Default)] @@ -869,7 +869,8 @@ pub(crate) mod tests { let mut disk = TestDisk::new(); let mut writer = DiskBtreeBuilder::<_, 6>::new(&mut disk); - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let all_keys: Vec<&[u8; 6]> = vec![ b"xaaaaa", b"xaaaba", b"xaaaca", b"xabaaa", b"xababa", b"xabaca", b"xabada", b"xabadb", @@ -887,7 +888,7 @@ pub(crate) mod tests { let reader = DiskBtreeReader::new(0, root_offset, disk); - reader.dump().await?; + reader.dump(&ctx).await?; // Test the `get` function on all the keys. for (key, val) in all_data.iter() { @@ -979,7 +980,8 @@ pub(crate) mod tests { async fn lots_of_keys() -> Result<()> { let mut disk = TestDisk::new(); let mut writer = DiskBtreeBuilder::<_, 8>::new(&mut disk); - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); const NUM_KEYS: u64 = 1000; @@ -997,7 +999,7 @@ pub(crate) mod tests { let reader = DiskBtreeReader::new(0, root_offset, disk); - reader.dump().await?; + reader.dump(&ctx).await?; use std::sync::Mutex; @@ -1167,7 +1169,8 @@ pub(crate) mod tests { // Build a tree from it let mut disk = TestDisk::new(); let mut writer = DiskBtreeBuilder::<_, 26>::new(&mut disk); - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); for (key, val) in disk_btree_test_data::TEST_DATA { writer.append(&key, val)?; @@ -1198,7 +1201,7 @@ pub(crate) mod tests { .await?; assert_eq!(count, disk_btree_test_data::TEST_DATA.len()); - reader.dump().await?; + reader.dump(&ctx).await?; Ok(()) } diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index cb25fa6185..f048a355a8 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -351,7 +351,8 @@ mod tests { let timeline_id = TimelineId::from_str("22000000000000000000000000000000").unwrap(); fs::create_dir_all(conf.timeline_path(&tenant_shard_id, &timeline_id))?; - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); Ok((conf, tenant_shard_id, timeline_id, ctx)) } diff --git a/pageserver/src/tenant/secondary/downloader.rs b/pageserver/src/tenant/secondary/downloader.rs index 5f3a0932c4..1cf0241631 100644 --- a/pageserver/src/tenant/secondary/downloader.rs +++ b/pageserver/src/tenant/secondary/downloader.rs @@ -491,7 +491,10 @@ impl JobGenerator TenantDownloader<'a> { // Download the layers in the heatmap for timeline in heatmap.timelines { + let ctx = &ctx.with_scope_secondary_timeline(tenant_shard_id, &timeline.timeline_id); let timeline_state = timeline_states .remove(&timeline.timeline_id) .expect("Just populated above"); diff --git a/pageserver/src/tenant/size.rs b/pageserver/src/tenant/size.rs index ed6b351c75..8cc94b4e4d 100644 --- a/pageserver/src/tenant/size.rs +++ b/pageserver/src/tenant/size.rs @@ -474,7 +474,7 @@ async fn fill_logical_sizes( if cached_size.is_none() { let timeline = Arc::clone(timeline_hash.get(&timeline_id).unwrap()); let parallel_size_calcs = Arc::clone(limit); - let ctx = ctx.attached_child(); + let ctx = ctx.attached_child().with_scope_timeline(&timeline); joinset.spawn( calculate_logical_size(parallel_size_calcs, timeline, lsn, cause, ctx) .in_current_span(), diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 83ac6aab51..62adae1680 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -1334,7 +1334,7 @@ impl DeltaLayerInner { block_reader, ); - tree_reader.dump().await?; + tree_reader.dump(ctx).await?; let keys = self.index_entries(ctx).await?; @@ -1972,6 +1972,7 @@ pub(crate) mod test { .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, ctx) .await .unwrap(); + let ctx = &ctx.with_scope_timeline(&timeline); let initdb_layer = timeline .layers diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 0db9e8c845..2e6cee036c 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -199,7 +199,7 @@ impl ImageLayerInner { block_reader, ); - tree_reader.dump().await?; + tree_reader.dump(ctx).await?; tree_reader .visit( diff --git a/pageserver/src/tenant/storage_layer/layer/tests.rs b/pageserver/src/tenant/storage_layer/layer/tests.rs index a7f3c6b8c5..7086429bfe 100644 --- a/pageserver/src/tenant/storage_layer/layer/tests.rs +++ b/pageserver/src/tenant/storage_layer/layer/tests.rs @@ -8,7 +8,6 @@ use utils::id::TimelineId; use super::failpoints::{Failpoint, FailpointKind}; use super::*; use crate::context::DownloadBehavior; -use crate::task_mgr::TaskKind; use crate::tenant::harness::{TenantHarness, test_img}; use crate::tenant::storage_layer::{IoConcurrency, LayerVisibilityHint}; @@ -27,11 +26,9 @@ async fn smoke_test() { let h = TenantHarness::create("smoke_test").await.unwrap(); let span = h.span(); let download_span = span.in_scope(|| tracing::info_span!("downloading", timeline_id = 1)); - let (tenant, _) = h.load().await; + let (tenant, ctx) = h.load().await; let io_concurrency = IoConcurrency::spawn_for_test(); - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Download); - let image_layers = vec![( Lsn(0x40), vec![( @@ -56,6 +53,7 @@ async fn smoke_test() { ) .await .unwrap(); + let ctx = &ctx.with_scope_timeline(&timeline); // Grab one of the timeline's layers to exercise in the test, and the other layer that is just // there to avoid the timeline being illegally empty @@ -94,7 +92,7 @@ async fn smoke_test() { controlfile_keyspace.clone(), Lsn(0x10)..Lsn(0x11), &mut data, - &ctx, + ctx, ) .await .unwrap(); @@ -129,7 +127,7 @@ async fn smoke_test() { controlfile_keyspace.clone(), Lsn(0x10)..Lsn(0x11), &mut data, - &ctx, + ctx, ) .instrument(download_span.clone()) .await @@ -179,7 +177,7 @@ async fn smoke_test() { // plain downloading is rarely needed layer - .download_and_keep_resident(&ctx) + .download_and_keep_resident(ctx) .instrument(download_span) .await .unwrap(); @@ -341,6 +339,7 @@ fn read_wins_pending_eviction() { .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx) .await .unwrap(); + let ctx = ctx.with_scope_timeline(&timeline); let layer = { let mut layers = { @@ -473,6 +472,7 @@ fn multiple_pending_evictions_scenario(name: &'static str, in_order: bool) { .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx) .await .unwrap(); + let ctx = ctx.with_scope_timeline(&timeline); let layer = { let mut layers = { @@ -642,12 +642,12 @@ async fn cancelled_get_or_maybe_download_does_not_cancel_eviction() { .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx) .await .unwrap(); + let ctx = ctx.with_scope_timeline(&timeline); // This test does downloads let ctx = RequestContextBuilder::extend(&ctx) .download_behavior(DownloadBehavior::Download) .build(); - let layer = { let mut layers = { let layers = timeline.layers.read().await; @@ -727,6 +727,7 @@ async fn evict_and_wait_does_not_wait_for_download() { .create_test_timeline(TimelineId::generate(), Lsn(0x10), 14, &ctx) .await .unwrap(); + let ctx = ctx.with_scope_timeline(&timeline); // This test does downloads let ctx = RequestContextBuilder::extend(&ctx) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 4483ecfe94..e01c3dbd4d 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -287,7 +287,7 @@ pub struct Timeline { // The LSN of gc-compaction that was last applied to this timeline. gc_compaction_state: ArcSwap>, - pub(super) metrics: TimelineMetrics, + pub(crate) metrics: Arc, // `Timeline` doesn't write these metrics itself, but it manages the lifetime. Code // in `crate::page_service` writes these metrics. @@ -2685,14 +2685,14 @@ impl Timeline { } Arc::new_cyclic(|myself| { - let metrics = TimelineMetrics::new( + let metrics = Arc::new(TimelineMetrics::new( &tenant_shard_id, &timeline_id, crate::metrics::EvictionsWithLowResidenceDurationBuilder::new( "mtime", evictions_low_residence_duration_metric_threshold, ), - ); + )); let aux_file_metrics = metrics.aux_file_size_gauge.clone(); let mut result = Timeline { @@ -2876,7 +2876,7 @@ impl Timeline { "layer flush task", async move { let _guard = guard; - let background_ctx = RequestContext::todo_child(TaskKind::LayerFlushTask, DownloadBehavior::Error); + let background_ctx = RequestContext::todo_child(TaskKind::LayerFlushTask, DownloadBehavior::Error).with_scope_timeline(&self_clone); self_clone.flush_loop(layer_flush_start_rx, &background_ctx).await; let mut flush_loop_state = self_clone.flush_loop_state.lock().unwrap(); assert!(matches!(*flush_loop_state, FlushLoopState::Running{..})); @@ -7127,6 +7127,7 @@ mod tests { ) .await .unwrap(); + let ctx = &ctx.with_scope_timeline(&timeline); // Layer visibility is an input to heatmap generation, so refresh it first timeline.update_layer_visibility().await.unwrap(); @@ -7192,7 +7193,7 @@ mod tests { eprintln!("Downloading {layer} and re-generating heatmap"); - let ctx = &RequestContextBuilder::extend(&ctx) + let ctx = &RequestContextBuilder::extend(ctx) .download_behavior(crate::context::DownloadBehavior::Download) .build(); diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index c9666bb4e1..740f590735 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -11,6 +11,7 @@ use utils::id::TimelineId; use utils::{crashsafe, fs_ext, pausable_failpoint}; use crate::config::PageServerConf; +use crate::context::RequestContext; use crate::task_mgr::{self, TaskKind}; use crate::tenant::metadata::TimelineMetadata; use crate::tenant::remote_timeline_client::{ @@ -291,10 +292,11 @@ impl DeleteTimelineFlow { timeline_id: TimelineId, local_metadata: &TimelineMetadata, remote_client: RemoteTimelineClient, + ctx: &RequestContext, ) -> anyhow::Result<()> { // Note: here we even skip populating layer map. Timeline is essentially uninitialized. // RemoteTimelineClient is the only functioning part. - let timeline = tenant + let (timeline, _timeline_ctx) = tenant .create_timeline_struct( timeline_id, local_metadata, @@ -307,6 +309,7 @@ impl DeleteTimelineFlow { crate::tenant::CreateTimelineIdempotency::FailWithConflict, // doesn't matter what we put here None, // doesn't matter what we put here None, // doesn't matter what we put here + ctx, ) .context("create_timeline_struct")?; diff --git a/pageserver/src/tenant/timeline/eviction_task.rs b/pageserver/src/tenant/timeline/eviction_task.rs index 187d9f248e..397e8e8978 100644 --- a/pageserver/src/tenant/timeline/eviction_task.rs +++ b/pageserver/src/tenant/timeline/eviction_task.rs @@ -93,7 +93,8 @@ impl Timeline { } } - let ctx = RequestContext::new(TaskKind::Eviction, DownloadBehavior::Warn); + let ctx = RequestContext::new(TaskKind::Eviction, DownloadBehavior::Warn) + .with_scope_timeline(&self); loop { let policy = self.get_eviction_policy(); let cf = self diff --git a/pageserver/src/tenant/vectored_blob_io.rs b/pageserver/src/tenant/vectored_blob_io.rs index dcf17a376c..166917d674 100644 --- a/pageserver/src/tenant/vectored_blob_io.rs +++ b/pageserver/src/tenant/vectored_blob_io.rs @@ -961,7 +961,8 @@ mod tests { } async fn round_trip_test_compressed(blobs: &[Vec], compression: bool) -> Result<(), Error> { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let (_temp_dir, pathbuf, offsets) = write_maybe_compressed::(blobs, compression, &ctx).await?; diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index b47aecf8a6..1da3130df0 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -26,15 +26,14 @@ use owned_buffers_io::io_buf_aligned::{IoBufAligned, IoBufAlignedMut}; use owned_buffers_io::io_buf_ext::FullSlice; use pageserver_api::config::defaults::DEFAULT_IO_BUFFER_ALIGNMENT; pub use pageserver_api::models::virtual_file as api; -use pageserver_api::shard::TenantShardId; use tokio::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; use tokio::time::Instant; use tokio_epoll_uring::{BoundedBuf, IoBuf, IoBufMut, Slice}; +use crate::assert_u64_eq_usize::UsizeIsU64; use crate::context::RequestContext; -use crate::metrics::{STORAGE_IO_SIZE, STORAGE_IO_TIME_METRIC, StorageIoOperation}; +use crate::metrics::{STORAGE_IO_TIME_METRIC, StorageIoOperation}; use crate::page_cache::{PAGE_SZ, PageWriteGuard}; -use crate::tenant::TENANTS_SEGMENT_NAME; pub(crate) mod io_engine; pub use io_engine::{ FeatureTestResult as IoEngineFeatureTestResult, feature_test as io_engine_feature_test, @@ -121,7 +120,7 @@ impl VirtualFile { pub async fn open_with_options>( path: P, open_options: &OpenOptions, - ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */ + ctx: &RequestContext, ) -> Result { let inner = VirtualFileInner::open_with_options(path, open_options, ctx).await?; Ok(VirtualFile { @@ -133,7 +132,7 @@ impl VirtualFile { pub async fn open_with_options_v2>( path: P, open_options: &OpenOptions, - ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */ + ctx: &RequestContext, ) -> Result { let file = match get_io_mode() { IoMode::Buffered => { @@ -304,13 +303,6 @@ pub struct VirtualFileInner { /// storing it here. pub path: Utf8PathBuf, open_options: OpenOptions, - - // These are strings becase we only use them for metrics, and those expect strings. - // It makes no sense for us to constantly turn the `TimelineId` and `TenantId` into - // strings. - tenant_id: String, - shard_id: String, - timeline_id: String, } #[derive(Debug, PartialEq, Clone, Copy)] @@ -592,36 +584,16 @@ impl VirtualFileInner { pub async fn open_with_options>( path: P, open_options: &OpenOptions, - _ctx: &RequestContext, /* TODO: carry a pointer to the metrics in the RequestContext instead of the parsing https://github.com/neondatabase/neon/issues/6107 */ + _ctx: &RequestContext, ) -> Result { - let path_ref = path.as_ref(); - let path_str = path_ref.to_string(); - let parts = path_str.split('/').collect::>(); - let (tenant_id, shard_id, timeline_id) = - if parts.len() > 5 && parts[parts.len() - 5] == TENANTS_SEGMENT_NAME { - let tenant_shard_part = parts[parts.len() - 4]; - let (tenant_id, shard_id) = match tenant_shard_part.parse::() { - Ok(tenant_shard_id) => ( - tenant_shard_id.tenant_id.to_string(), - format!("{}", tenant_shard_id.shard_slug()), - ), - Err(_) => { - // Malformed path: this ID is just for observability, so tolerate it - // and pass through - (tenant_shard_part.to_string(), "*".to_string()) - } - }; - (tenant_id, shard_id, parts[parts.len() - 2].to_string()) - } else { - ("*".to_string(), "*".to_string(), "*".to_string()) - }; + let path = path.as_ref(); let (handle, mut slot_guard) = get_open_files().find_victim_slot().await; // NB: there is also StorageIoOperation::OpenAfterReplace which is for the case // where our caller doesn't get to use the returned VirtualFile before its // slot gets re-used by someone else. let file = observe_duration!(StorageIoOperation::Open, { - open_options.open(path_ref.as_std_path()).await? + open_options.open(path.as_std_path()).await? }); // Strip all options other than read and write. @@ -637,11 +609,8 @@ impl VirtualFileInner { let vfile = VirtualFileInner { handle: RwLock::new(handle), pos: 0, - path: path_ref.to_path_buf(), + path: path.to_owned(), open_options: reopen_options, - tenant_id, - shard_id, - timeline_id, }; // TODO: Under pressure, it's likely the slot will get re-used and @@ -944,7 +913,7 @@ impl VirtualFileInner { &self, buf: tokio_epoll_uring::Slice, offset: u64, - _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */ + ctx: &RequestContext, ) -> (tokio_epoll_uring::Slice, Result) where Buf: tokio_epoll_uring::IoBufMut + Send, @@ -962,14 +931,7 @@ impl VirtualFileInner { let ((_file_guard, buf), res) = io_engine::get().read_at(file_guard, offset, buf).await; let res = res.maybe_fatal_err("io_engine read_at inside VirtualFileInner::read_at"); if let Ok(size) = res { - STORAGE_IO_SIZE - .with_label_values(&[ - "read", - &self.tenant_id, - &self.shard_id, - &self.timeline_id, - ]) - .add(size as i64); + ctx.io_size_metrics().read.add(size.into_u64()); } (buf, res) }) @@ -980,9 +942,9 @@ impl VirtualFileInner { &self, buf: FullSlice, offset: u64, - _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */ + ctx: &RequestContext, ) -> (FullSlice, Result) { - let (slice, result) = self.write_at_inner(buf, offset, _ctx).await; + let (slice, result) = self.write_at_inner(buf, offset, ctx).await; let result = result.maybe_fatal_err("write_at"); (slice, result) } @@ -991,7 +953,7 @@ impl VirtualFileInner { &self, buf: FullSlice, offset: u64, - _ctx: &RequestContext, /* TODO: use for metrics: https://github.com/neondatabase/neon/issues/6107 */ + ctx: &RequestContext, ) -> (FullSlice, Result) { let file_guard = match self.lock_file().await { Ok(file_guard) => file_guard, @@ -1001,14 +963,7 @@ impl VirtualFileInner { let ((_file_guard, buf), result) = io_engine::get().write_at(file_guard, offset, buf).await; if let Ok(size) = result { - STORAGE_IO_SIZE - .with_label_values(&[ - "write", - &self.tenant_id, - &self.shard_id, - &self.timeline_id, - ]) - .add(size as i64); + ctx.io_size_metrics().write.add(size.into_u64()); } (buf, result) }) @@ -1593,7 +1548,8 @@ mod tests { where A: Adapter, { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let testdir = crate::config::PageServerConf::test_repo_dir(testname); std::fs::create_dir_all(&testdir)?; @@ -1720,7 +1676,8 @@ mod tests { const THREADS: usize = 100; const SAMPLE: [u8; SIZE] = [0xADu8; SIZE]; - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let testdir = crate::config::PageServerConf::test_repo_dir("vfile_concurrency"); std::fs::create_dir_all(&testdir)?; @@ -1779,7 +1736,8 @@ mod tests { #[tokio::test] async fn test_atomic_overwrite_basic() { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let testdir = crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_basic"); std::fs::create_dir_all(&testdir).unwrap(); @@ -1807,7 +1765,8 @@ mod tests { #[tokio::test] async fn test_atomic_overwrite_preexisting_tmp() { - let ctx = RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error); + let ctx = + RequestContext::new(TaskKind::UnitTest, DownloadBehavior::Error).with_scope_unit_test(); let testdir = crate::config::PageServerConf::test_repo_dir("test_atomic_overwrite_preexisting_tmp"); std::fs::create_dir_all(&testdir).unwrap(); From d1b60fa0b69dde210ec449062b0565cb4c1889a8 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Tue, 11 Mar 2025 10:48:50 +0000 Subject: [PATCH 142/207] fix(proxy): delete prepared statements when discarding (#11165) Fixes https://github.com/neondatabase/serverless/issues/144 When tables have enums, we need to perform type queries for that data. We cache these query statements for performance reasons. In Neon RLS, we run "discard all" for security reasons, which discards all the statements. When we need to type check again, the statements are no longer valid. This fixes it to discard the statements as well. I've also added some new logs and error types to monitor this. Currently we don't see the prepared statement errors in our logs. --- libs/proxy/tokio-postgres2/src/client.rs | 12 +++ proxy/src/serverless/local_conn_pool.rs | 14 +++- proxy/src/serverless/sql_over_http.rs | 95 ++++++++++++++++-------- 3 files changed, 87 insertions(+), 34 deletions(-) diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs index c70cb598de..08a06163e1 100644 --- a/libs/proxy/tokio-postgres2/src/client.rs +++ b/libs/proxy/tokio-postgres2/src/client.rs @@ -284,6 +284,18 @@ impl Client { simple_query::batch_execute(self.inner(), query).await } + pub async fn discard_all(&self) -> Result { + // clear the prepared statements that are about to be nuked from the postgres session + { + let mut typeinfo = self.inner.cached_typeinfo.lock(); + typeinfo.typeinfo = None; + typeinfo.typeinfo_composite = None; + typeinfo.typeinfo_enum = None; + } + + self.batch_execute("discard all").await + } + /// Begins a new database transaction. /// /// The transaction will roll back by default - use the `commit` method to commit it. diff --git a/proxy/src/serverless/local_conn_pool.rs b/proxy/src/serverless/local_conn_pool.rs index 8426a0810e..c958d077fc 100644 --- a/proxy/src/serverless/local_conn_pool.rs +++ b/proxy/src/serverless/local_conn_pool.rs @@ -35,6 +35,7 @@ use super::conn_pool_lib::{ Client, ClientDataEnum, ClientInnerCommon, ClientInnerExt, ConnInfo, DbUserConn, EndpointConnPool, }; +use super::sql_over_http::SqlOverHttpError; use crate::context::RequestContext; use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::Metrics; @@ -274,18 +275,23 @@ pub(crate) fn poll_client( } impl ClientInnerCommon { - pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), HttpConnError> { + pub(crate) async fn set_jwt_session(&mut self, payload: &[u8]) -> Result<(), SqlOverHttpError> { if let ClientDataEnum::Local(local_data) = &mut self.data { local_data.jti += 1; let token = resign_jwt(&local_data.key, payload, local_data.jti)?; - // discard all cannot run in a transaction. must be executed alone. - self.inner.batch_execute("discard all").await?; + self.inner + .discard_all() + .await + .map_err(SqlOverHttpError::InternalPostgres)?; // initiates the auth session // this is safe from query injections as the jwt format free of any escape characters. let query = format!("select auth.jwt_session_init('{token}')"); - self.inner.batch_execute(&query).await?; + self.inner + .batch_execute(&query) + .await + .map_err(SqlOverHttpError::InternalPostgres)?; let pid = self.inner.get_process_id(); info!(pid, jti = local_data.jti, "user session state init"); diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 93dd531f70..612702231f 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -412,8 +412,12 @@ pub(crate) enum SqlOverHttpError { ResponseTooLarge(usize), #[error("invalid isolation level")] InvalidIsolationLevel, + /// for queries our customers choose to run #[error("{0}")] - Postgres(#[from] postgres_client::Error), + Postgres(#[source] postgres_client::Error), + /// for queries we choose to run + #[error("{0}")] + InternalPostgres(#[source] postgres_client::Error), #[error("{0}")] JsonConversion(#[from] JsonConversionError), #[error("{0}")] @@ -429,6 +433,13 @@ impl ReportableError for SqlOverHttpError { SqlOverHttpError::ResponseTooLarge(_) => ErrorKind::User, SqlOverHttpError::InvalidIsolationLevel => ErrorKind::User, SqlOverHttpError::Postgres(p) => p.get_error_kind(), + SqlOverHttpError::InternalPostgres(p) => { + if p.as_db_error().is_some() { + ErrorKind::Service + } else { + ErrorKind::Compute + } + } SqlOverHttpError::JsonConversion(_) => ErrorKind::Postgres, SqlOverHttpError::Cancelled(c) => c.get_error_kind(), } @@ -444,6 +455,7 @@ impl UserFacingError for SqlOverHttpError { SqlOverHttpError::ResponseTooLarge(_) => self.to_string(), SqlOverHttpError::InvalidIsolationLevel => self.to_string(), SqlOverHttpError::Postgres(p) => p.to_string(), + SqlOverHttpError::InternalPostgres(p) => p.to_string(), SqlOverHttpError::JsonConversion(_) => "could not parse postgres response".to_string(), SqlOverHttpError::Cancelled(_) => self.to_string(), } @@ -462,6 +474,7 @@ impl HttpCodeError for SqlOverHttpError { SqlOverHttpError::ResponseTooLarge(_) => StatusCode::INSUFFICIENT_STORAGE, SqlOverHttpError::InvalidIsolationLevel => StatusCode::BAD_REQUEST, SqlOverHttpError::Postgres(_) => StatusCode::BAD_REQUEST, + SqlOverHttpError::InternalPostgres(_) => StatusCode::INTERNAL_SERVER_ERROR, SqlOverHttpError::JsonConversion(_) => StatusCode::INTERNAL_SERVER_ERROR, SqlOverHttpError::Cancelled(_) => StatusCode::INTERNAL_SERVER_ERROR, } @@ -671,16 +684,14 @@ async fn handle_db_inner( let authenticate_and_connect = Box::pin( async { let keys = match auth { - AuthData::Password(pw) => { - backend - .authenticate_with_password(ctx, &conn_info.user_info, &pw) - .await? - } - AuthData::Jwt(jwt) => { - backend - .authenticate_with_jwt(ctx, &conn_info.user_info, jwt) - .await? - } + AuthData::Password(pw) => backend + .authenticate_with_password(ctx, &conn_info.user_info, &pw) + .await + .map_err(HttpConnError::AuthError)?, + AuthData::Jwt(jwt) => backend + .authenticate_with_jwt(ctx, &conn_info.user_info, jwt) + .await + .map_err(HttpConnError::AuthError)?, }; let client = match keys.keys { @@ -703,7 +714,7 @@ async fn handle_db_inner( // not strictly necessary to mark success here, // but it's just insurance for if we forget it somewhere else ctx.success(); - Ok::<_, HttpConnError>(client) + Ok::<_, SqlOverHttpError>(client) } .map_err(SqlOverHttpError::from), ); @@ -933,11 +944,15 @@ impl BatchQueryData { builder = builder.deferrable(true); } - let transaction = builder.start().await.inspect_err(|_| { - // if we cannot start a transaction, we should return immediately - // and not return to the pool. connection is clearly broken - discard.discard(); - })?; + let transaction = builder + .start() + .await + .inspect_err(|_| { + // if we cannot start a transaction, we should return immediately + // and not return to the pool. connection is clearly broken + discard.discard(); + }) + .map_err(SqlOverHttpError::Postgres)?; let json_output = match query_batch( config, @@ -950,11 +965,15 @@ impl BatchQueryData { { Ok(json_output) => { info!("commit"); - let status = transaction.commit().await.inspect_err(|_| { - // if we cannot commit - for now don't return connection to pool - // TODO: get a query status from the error - discard.discard(); - })?; + let status = transaction + .commit() + .await + .inspect_err(|_| { + // if we cannot commit - for now don't return connection to pool + // TODO: get a query status from the error + discard.discard(); + }) + .map_err(SqlOverHttpError::Postgres)?; discard.check_idle(status); json_output } @@ -969,11 +988,15 @@ impl BatchQueryData { } Err(err) => { info!("rollback"); - let status = transaction.rollback().await.inspect_err(|_| { - // if we cannot rollback - for now don't return connection to pool - // TODO: get a query status from the error - discard.discard(); - })?; + let status = transaction + .rollback() + .await + .inspect_err(|_| { + // if we cannot rollback - for now don't return connection to pool + // TODO: get a query status from the error + discard.discard(); + }) + .map_err(SqlOverHttpError::Postgres)?; discard.check_idle(status); return Err(err); } @@ -1032,7 +1055,12 @@ async fn query_to_json( let query_start = Instant::now(); let query_params = data.params; - let mut row_stream = std::pin::pin!(client.query_raw_txt(&data.query, query_params).await?); + let mut row_stream = std::pin::pin!( + client + .query_raw_txt(&data.query, query_params) + .await + .map_err(SqlOverHttpError::Postgres)? + ); let query_acknowledged = Instant::now(); // Manually drain the stream into a vector to leave row_stream hanging @@ -1040,7 +1068,7 @@ async fn query_to_json( // big. let mut rows: Vec = Vec::new(); while let Some(row) = row_stream.next().await { - let row = row?; + let row = row.map_err(SqlOverHttpError::Postgres)?; *current_size += row.body_len(); rows.push(row); // we don't have a streaming response support yet so this is to prevent OOM @@ -1091,7 +1119,14 @@ async fn query_to_json( "dataTypeModifier": c.type_modifier(), "format": "text", })); - columns.push(client.get_type(c.type_oid()).await?); + + match client.get_type(c.type_oid()).await { + Ok(t) => columns.push(t), + Err(err) => { + tracing::warn!(?err, "unable to query type information"); + return Err(SqlOverHttpError::InternalPostgres(err)); + } + } } let array_mode = data.array_mode.unwrap_or(parsed_headers.default_array_mode); From f466c0199581cfdad9c953c34a1f6bb0b40de78e Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 11 Mar 2025 12:43:55 +0100 Subject: [PATCH 143/207] pageserver: add `max_logical_size_per_shard` for `get_top_tenants` (#11157) ## Problem In #11122, we want to split shards once the logical size of the largest timeline exceeds a split threshold. However, `get_top_tenants` currently only returns `max_logical_size`, which tracks the max _total_ logical size of a timeline across all shards. This is problematic, because the storage controller needs to fetch a list of N tenants that are eligible for splits, but the API doesn't currently have a way to express this. For example, with a split threshold of 1 GB, a tenant with `max_logical_size` of 4 GB is eligible to split if it has 1 or 2 shards, but not if it already has 4 shards. We need to express this in per-shard terms, otherwise the `get_top_tenants` endpoint may end up only returning tenants that can't be split, blocking splits entirely. Touches https://github.com/neondatabase/neon/pull/11122. Touches https://github.com/neondatabase/cloud/issues/22532. ## Summary of changes Add `TenantShardItem::max_logical_size_per_shard` containing `max_logical_size / shard_count`, and `TenantSorting::MaxLogicalSizePerShard` to order and filter by it. --- libs/pageserver_api/src/models.rs | 18 +++++++++++++++--- pageserver/src/http/routes.rs | 1 + pageserver/src/tenant.rs | 5 +++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 13a9b5d89e..b1ebad83b1 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -1476,8 +1476,14 @@ pub struct TenantScanRemoteStorageResponse { #[derive(Serialize, Deserialize, Debug, Clone)] #[serde(rename_all = "snake_case")] pub enum TenantSorting { + /// Total size of layers on local disk for all timelines in a shard. ResidentSize, + /// The logical size of the largest timeline within a _tenant_ (not shard). Only tracked on + /// shard 0, contains the sum across all shards. MaxLogicalSize, + /// The logical size of the largest timeline within a _tenant_ (not shard), divided by number of + /// shards. Only tracked on shard 0, and estimates the per-shard logical size. + MaxLogicalSizePerShard, } impl Default for TenantSorting { @@ -1507,14 +1513,20 @@ pub struct TopTenantShardsRequest { pub struct TopTenantShardItem { pub id: TenantShardId, - /// Total size of layers on local disk for all timelines in this tenant + /// Total size of layers on local disk for all timelines in this shard. pub resident_size: u64, - /// Total size of layers in remote storage for all timelines in this tenant + /// Total size of layers in remote storage for all timelines in this shard. pub physical_size: u64, - /// The largest logical size of a timeline within this tenant + /// The largest logical size of a timeline within this _tenant_ (not shard). This is only + /// tracked on shard 0, and contains the sum of the logical size across all shards. pub max_logical_size: u64, + + /// The largest logical size of a timeline within this _tenant_ (not shard) divided by number of + /// shards. This is only tracked on shard 0, and is only an estimate as we divide it evenly by + /// shard count, rounded up. + pub max_logical_size_per_shard: u64, } #[derive(Serialize, Deserialize, Debug, Default)] diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 77bfab47e0..e5848bfd25 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -3223,6 +3223,7 @@ async fn post_top_tenants( match order_by { TenantSorting::ResidentSize => sizes.resident_size, TenantSorting::MaxLogicalSize => sizes.max_logical_size, + TenantSorting::MaxLogicalSizePerShard => sizes.max_logical_size_per_shard, } } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 3a34c8e254..62e1cdac0c 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3842,6 +3842,7 @@ impl Tenant { resident_size: 0, physical_size: 0, max_logical_size: 0, + max_logical_size_per_shard: 0, }; for timeline in self.timelines.lock().unwrap().values() { @@ -3858,6 +3859,10 @@ impl Tenant { ); } + result.max_logical_size_per_shard = result + .max_logical_size + .div_ceil(self.tenant_shard_id.shard_count.count() as u64); + result } } From 359c64c7797887694d7d0b5745bc2f99b34ac5ac Mon Sep 17 00:00:00 2001 From: Arseny Sher Date: Tue, 11 Mar 2025 17:01:00 +0300 Subject: [PATCH 144/207] walproposer: pre generations refactoring (#11060) ## Problem https://github.com/neondatabase/neon/issues/10851 ## Summary of changes Do some refactoring before making walproposer generations aware. - Rename SS_VOTING to SS_WAIT_VOTING, SS_IDLE to SS_WAIT_ELECTED - Continue to get rid of epochs: rename GetEpoch to GetLastLogTerm, donorEpoch to donorLastLogTerm - Instead of counting n_votes, n_connected, introduce explicit WalProposerState (collecting terms / voting / elected). Refactor out TermsCollected and VotesCollected; they will determine state transition differently depending whether generations are enabled or not. There is no new logic in this PR and thus no new tests. --- pgxn/neon/walproposer.c | 257 ++++++++++-------- pgxn/neon/walproposer.h | 22 +- pgxn/neon/walproposer_pg.c | 2 +- .../tests/walproposer_sim/walproposer_api.rs | 7 +- 4 files changed, 157 insertions(+), 131 deletions(-) diff --git a/pgxn/neon/walproposer.c b/pgxn/neon/walproposer.c index 7ec4ec99fc..0336d63e8d 100644 --- a/pgxn/neon/walproposer.c +++ b/pgxn/neon/walproposer.c @@ -57,10 +57,11 @@ static void SendProposerGreeting(Safekeeper *sk); static void RecvAcceptorGreeting(Safekeeper *sk); static void SendVoteRequest(Safekeeper *sk); static void RecvVoteResponse(Safekeeper *sk); +static bool VotesCollected(WalProposer *wp); static void HandleElectedProposer(WalProposer *wp); static term_t GetHighestTerm(TermHistory *th); -static term_t GetEpoch(Safekeeper *sk); -static void DetermineEpochStartLsn(WalProposer *wp); +static term_t GetLastLogTerm(Safekeeper *sk); +static void ProcessPropStartPos(WalProposer *wp); static void SendProposerElected(Safekeeper *sk); static void StartStreaming(Safekeeper *sk); static void SendMessageToNode(Safekeeper *sk); @@ -97,6 +98,7 @@ WalProposerCreate(WalProposerConfig *config, walproposer_api api) wp = palloc0(sizeof(WalProposer)); wp->config = config; wp->api = api; + wp->state = WPS_COLLECTING_TERMS; wp_log(LOG, "neon.safekeepers=%s", wp->config->safekeepers_list); @@ -518,7 +520,7 @@ AdvancePollState(Safekeeper *sk, uint32 events) * nodes are transferred from SS_VOTING to sending actual vote * requests. */ - case SS_VOTING: + case SS_WAIT_VOTING: wp_log(WARNING, "EOF from node %s:%s in %s state", sk->host, sk->port, FormatSafekeeperState(sk)); ResetConnection(sk); @@ -547,7 +549,7 @@ AdvancePollState(Safekeeper *sk, uint32 events) /* * Idle state for waiting votes from quorum. */ - case SS_IDLE: + case SS_WAIT_ELECTED: wp_log(WARNING, "EOF from node %s:%s in %s state", sk->host, sk->port, FormatSafekeeperState(sk)); ResetConnection(sk); @@ -721,6 +723,15 @@ SendProposerGreeting(Safekeeper *sk) BlockingWrite(sk, sk->outbuf.data, sk->outbuf.len, SS_HANDSHAKE_RECV); } +/* + * Have we received greeting from enough (quorum) safekeepers to start voting? + */ +static bool +TermsCollected(WalProposer *wp) +{ + return wp->n_connected >= wp->quorum; +} + static void RecvAcceptorGreeting(Safekeeper *sk) { @@ -754,7 +765,7 @@ RecvAcceptorGreeting(Safekeeper *sk) } /* Protocol is all good, move to voting. */ - sk->state = SS_VOTING; + sk->state = SS_WAIT_VOTING; /* * Note: it would be better to track the counter on per safekeeper basis, @@ -762,17 +773,18 @@ RecvAcceptorGreeting(Safekeeper *sk) * as is for now. */ ++wp->n_connected; - if (wp->n_connected <= wp->quorum) + if (wp->state == WPS_COLLECTING_TERMS) { /* We're still collecting terms from the majority. */ wp->propTerm = Max(sk->greetResponse.term, wp->propTerm); /* Quorum is acquried, prepare the vote request. */ - if (wp->n_connected == wp->quorum) + if (TermsCollected(wp)) { wp->propTerm++; wp_log(LOG, "proposer connected to quorum (%d) safekeepers, propTerm=" INT64_FORMAT, wp->quorum, wp->propTerm); + wp->state = WPS_CAMPAIGN; wp->voteRequest.pam.tag = 'v'; wp->voteRequest.generation = wp->mconf.generation; wp->voteRequest.term = wp->propTerm; @@ -787,12 +799,10 @@ RecvAcceptorGreeting(Safekeeper *sk) } /* - * Check if we have quorum. If there aren't enough safekeepers, wait and - * do nothing. We'll eventually get a task when the election starts. - * - * If we do have quorum, we can start an election. + * If we have quorum, start (or just send vote request to newly connected + * node) election, otherwise wait until we have more greetings. */ - if (wp->n_connected < wp->quorum) + if (wp->state == WPS_COLLECTING_TERMS) { /* * SS_VOTING is an idle state; read-ready indicates the connection @@ -807,11 +817,7 @@ RecvAcceptorGreeting(Safekeeper *sk) */ for (int j = 0; j < wp->n_safekeepers; j++) { - /* - * Remember: SS_VOTING indicates that the safekeeper is - * participating in voting, but hasn't sent anything yet. - */ - if (wp->safekeeper[j].state == SS_VOTING) + if (wp->safekeeper[j].state == SS_WAIT_VOTING) SendVoteRequest(&wp->safekeeper[j]); } } @@ -838,6 +844,8 @@ RecvVoteResponse(Safekeeper *sk) { WalProposer *wp = sk->wp; + Assert(wp->state >= WPS_CAMPAIGN); + sk->voteResponse.apm.tag = 'v'; if (!AsyncReadMessage(sk, (AcceptorProposerMessage *) &sk->voteResponse)) return; @@ -856,7 +864,7 @@ RecvVoteResponse(Safekeeper *sk) * we are not elected yet and thus need the vote. */ if ((!sk->voteResponse.voteGiven) && - (sk->voteResponse.term > wp->propTerm || wp->n_votes < wp->quorum)) + (sk->voteResponse.term > wp->propTerm || wp->state == WPS_CAMPAIGN)) { wp_log(FATAL, "WAL acceptor %s:%s with term " INT64_FORMAT " rejects our connection request with term " INT64_FORMAT "", sk->host, sk->port, @@ -864,38 +872,83 @@ RecvVoteResponse(Safekeeper *sk) } Assert(sk->voteResponse.term == wp->propTerm); - /* Handshake completed, do we have quorum? */ + /* ready for elected message */ + sk->state = SS_WAIT_ELECTED; + wp->n_votes++; - if (wp->n_votes < wp->quorum) + /* Are we already elected? */ + if (wp->state == WPS_CAMPAIGN) { - sk->state = SS_IDLE; /* can't do much yet, no quorum */ - } - else if (wp->n_votes > wp->quorum) - { - /* already elected, start streaming */ - SendProposerElected(sk); + /* no; check if this vote makes us elected */ + if (VotesCollected(wp)) + { + wp->state = WPS_ELECTED; + HandleElectedProposer(wp); + } + else + { + /* can't do much yet, no quorum */ + return; + } } else { - sk->state = SS_IDLE; - /* Idle state waits for read-ready events */ - wp->api.update_event_set(sk, WL_SOCKET_READABLE); - - HandleElectedProposer(sk->wp); + Assert(wp->state == WPS_ELECTED); + /* send elected only to this sk */ + SendProposerElected(sk); } } +/* + * Checks if enough votes has been collected to get elected and if that's the + * case finds the highest vote, setting donor, donorLastLogTerm, + * propTermStartLsn fields. Also sets truncateLsn. + */ +static bool +VotesCollected(WalProposer *wp) +{ + int n_ready = 0; + + /* assumed to be called only when not elected yet */ + Assert(wp->state == WPS_CAMPAIGN); + + wp->propTermStartLsn = InvalidXLogRecPtr; + wp->donorLastLogTerm = 0; + wp->truncateLsn = InvalidXLogRecPtr; + + for (int i = 0; i < wp->n_safekeepers; i++) + { + if (wp->safekeeper[i].state == SS_WAIT_ELECTED) + { + n_ready++; + + if (GetLastLogTerm(&wp->safekeeper[i]) > wp->donorLastLogTerm || + (GetLastLogTerm(&wp->safekeeper[i]) == wp->donorLastLogTerm && + wp->safekeeper[i].voteResponse.flushLsn > wp->propTermStartLsn)) + { + wp->donorLastLogTerm = GetLastLogTerm(&wp->safekeeper[i]); + wp->propTermStartLsn = wp->safekeeper[i].voteResponse.flushLsn; + wp->donor = i; + } + wp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn); + } + } + + return n_ready >= wp->quorum; +} + /* * Called once a majority of acceptors have voted for us and current proposer * has been elected. * - * Sends ProposerElected message to all acceptors in SS_IDLE state and starts + * Sends ProposerElected message to all acceptors in SS_WAIT_ELECTED state and starts * replication from walsender. */ static void HandleElectedProposer(WalProposer *wp) { - DetermineEpochStartLsn(wp); + ProcessPropStartPos(wp); + Assert(wp->propTermStartLsn != InvalidXLogRecPtr); /* * Synchronously download WAL from the most advanced safekeeper. We do @@ -907,40 +960,24 @@ HandleElectedProposer(WalProposer *wp) wp_log(FATAL, "failed to download WAL for logical replicaiton"); } - /* - * Zero propEpochStartLsn means majority of safekeepers doesn't have any - * WAL, timeline was just created. Compute bumps it to basebackup LSN, - * otherwise we must be sync-safekeepers and we have nothing to do then. - * - * Proceeding is not only pointless but harmful, because we'd give - * safekeepers term history starting with 0/0. These hacks will go away - * once we disable implicit timeline creation on safekeepers and create it - * with non zero LSN from the start. - */ - if (wp->propEpochStartLsn == InvalidXLogRecPtr) - { - Assert(wp->config->syncSafekeepers); - wp_log(LOG, "elected with zero propEpochStartLsn in sync-safekeepers, exiting"); - wp->api.finish_sync_safekeepers(wp, wp->propEpochStartLsn); - } - - if (wp->truncateLsn == wp->propEpochStartLsn && wp->config->syncSafekeepers) + if (wp->truncateLsn == wp->propTermStartLsn && wp->config->syncSafekeepers) { /* Sync is not needed: just exit */ - wp->api.finish_sync_safekeepers(wp, wp->propEpochStartLsn); + wp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn); /* unreachable */ } for (int i = 0; i < wp->n_safekeepers; i++) { - if (wp->safekeeper[i].state == SS_IDLE) + if (wp->safekeeper[i].state == SS_WAIT_ELECTED) SendProposerElected(&wp->safekeeper[i]); } /* * The proposer has been elected, and there will be no quorum waiting - * after this point. There will be no safekeeper with state SS_IDLE also, - * because that state is used only for quorum waiting. + * after this point. There will be no safekeeper with state + * SS_WAIT_ELECTED also, because that state is used only for quorum + * waiting. */ if (wp->config->syncSafekeepers) @@ -957,7 +994,7 @@ HandleElectedProposer(WalProposer *wp) return; } - wp->api.start_streaming(wp, wp->propEpochStartLsn); + wp->api.start_streaming(wp, wp->propTermStartLsn); /* Should not return here */ } @@ -970,7 +1007,7 @@ GetHighestTerm(TermHistory *th) /* safekeeper's epoch is the term of the highest entry in the log */ static term_t -GetEpoch(Safekeeper *sk) +GetLastLogTerm(Safekeeper *sk) { return GetHighestTerm(&sk->voteResponse.termHistory); } @@ -991,72 +1028,52 @@ SkipXLogPageHeader(WalProposer *wp, XLogRecPtr lsn) } /* - * Called after majority of acceptors gave votes, it calculates the most - * advanced safekeeper (who will be the donor) and epochStartLsn -- LSN since - * which we'll write WAL in our term. - * - * Sets truncateLsn along the way (though it is not of much use at this point -- - * only for skipping recovery). + * Called after quorum gave votes and proposer starting position (highest vote + * term + flush LSN) -- is determined (VotesCollected true), this function + * adopts it: pushes LSN to shmem, sets wp term history, verifies that the + * basebackup matches. */ static void -DetermineEpochStartLsn(WalProposer *wp) +ProcessPropStartPos(WalProposer *wp) { TermHistory *dth; - int n_ready = 0; WalproposerShmemState *walprop_shared; - wp->propEpochStartLsn = InvalidXLogRecPtr; - wp->donorEpoch = 0; - wp->truncateLsn = InvalidXLogRecPtr; - - for (int i = 0; i < wp->n_safekeepers; i++) - { - if (wp->safekeeper[i].state == SS_IDLE) - { - n_ready++; - - if (GetEpoch(&wp->safekeeper[i]) > wp->donorEpoch || - (GetEpoch(&wp->safekeeper[i]) == wp->donorEpoch && - wp->safekeeper[i].voteResponse.flushLsn > wp->propEpochStartLsn)) - { - wp->donorEpoch = GetEpoch(&wp->safekeeper[i]); - wp->propEpochStartLsn = wp->safekeeper[i].voteResponse.flushLsn; - wp->donor = i; - } - wp->truncateLsn = Max(wp->safekeeper[i].voteResponse.truncateLsn, wp->truncateLsn); - } - } - - if (n_ready < wp->quorum) - { - /* - * This is a rare case that can be triggered if safekeeper has voted - * and disconnected. In this case, its state will not be SS_IDLE and - * its vote cannot be used, because we clean up `voteResponse` in - * `ShutdownConnection`. - */ - wp_log(FATAL, "missing majority of votes, collected %d, expected %d, got %d", wp->n_votes, wp->quorum, n_ready); - } + /* must have collected votes */ + Assert(wp->state == WPS_ELECTED); /* - * If propEpochStartLsn is 0, it means flushLsn is 0 everywhere, we are - * bootstrapping and nothing was committed yet. Start streaming then from - * the basebackup LSN. + * If propTermStartLsn is 0, it means flushLsn is 0 everywhere, we are + * bootstrapping and nothing was committed yet. Start streaming from the + * basebackup LSN then. + * + * In case of sync-safekeepers just exit: proceeding is not only pointless + * but harmful, because we'd give safekeepers term history starting with + * 0/0. These hacks will go away once we disable implicit timeline + * creation on safekeepers and create it with non zero LSN from the start. */ - if (wp->propEpochStartLsn == InvalidXLogRecPtr && !wp->config->syncSafekeepers) + if (wp->propTermStartLsn == InvalidXLogRecPtr) { - wp->propEpochStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn(wp); - wp_log(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propEpochStartLsn)); + if (!wp->config->syncSafekeepers) + { + wp->propTermStartLsn = wp->truncateLsn = wp->api.get_redo_start_lsn(wp); + wp_log(LOG, "bumped epochStartLsn to the first record %X/%X", LSN_FORMAT_ARGS(wp->propTermStartLsn)); + } + else + { + wp_log(LOG, "elected with zero propTermStartLsn in sync-safekeepers, exiting"); + wp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn); + } } - pg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propEpochStartLsn); + pg_atomic_write_u64(&wp->api.get_shmem_state(wp)->propEpochStartLsn, wp->propTermStartLsn); Assert(wp->truncateLsn != InvalidXLogRecPtr || wp->config->syncSafekeepers); /* - * We will be generating WAL since propEpochStartLsn, so we should set + * We will be generating WAL since propTermStartLsn, so we should set * availableLsn to mark this LSN as the latest available position. */ - wp->availableLsn = wp->propEpochStartLsn; + wp->availableLsn = wp->propTermStartLsn; /* * Proposer's term history is the donor's + its own entry. @@ -1067,12 +1084,12 @@ DetermineEpochStartLsn(WalProposer *wp) if (dth->n_entries > 0) memcpy(wp->propTermHistory.entries, dth->entries, sizeof(TermSwitchEntry) * dth->n_entries); wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].term = wp->propTerm; - wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propEpochStartLsn; + wp->propTermHistory.entries[wp->propTermHistory.n_entries - 1].lsn = wp->propTermStartLsn; wp_log(LOG, "got votes from majority (%d) of nodes, term " UINT64_FORMAT ", epochStartLsn %X/%X, donor %s:%s, truncate_lsn %X/%X", wp->quorum, wp->propTerm, - LSN_FORMAT_ARGS(wp->propEpochStartLsn), + LSN_FORMAT_ARGS(wp->propTermStartLsn), wp->safekeeper[wp->donor].host, wp->safekeeper[wp->donor].port, LSN_FORMAT_ARGS(wp->truncateLsn)); @@ -1090,7 +1107,7 @@ DetermineEpochStartLsn(WalProposer *wp) * Safekeepers don't skip header as they need continious stream of * data, so correct LSN for comparison. */ - if (SkipXLogPageHeader(wp, wp->propEpochStartLsn) != wp->api.get_redo_start_lsn(wp)) + if (SkipXLogPageHeader(wp, wp->propTermStartLsn) != wp->api.get_redo_start_lsn(wp)) { /* * However, allow to proceed if last_log_term on the node which @@ -1111,8 +1128,8 @@ DetermineEpochStartLsn(WalProposer *wp) */ disable_core_dump(); wp_log(PANIC, - "collected propEpochStartLsn %X/%X, but basebackup LSN %X/%X", - LSN_FORMAT_ARGS(wp->propEpochStartLsn), + "collected propTermStartLsn %X/%X, but basebackup LSN %X/%X", + LSN_FORMAT_ARGS(wp->propTermStartLsn), LSN_FORMAT_ARGS(wp->api.get_redo_start_lsn(wp))); } } @@ -1623,7 +1640,7 @@ GetAcknowledgedByQuorumWALPosition(WalProposer *wp) * Like in Raft, we aren't allowed to commit entries from previous * terms, so ignore reported LSN until it gets to epochStartLsn. */ - responses[i] = wp->safekeeper[i].appendResponse.flushLsn >= wp->propEpochStartLsn ? wp->safekeeper[i].appendResponse.flushLsn : 0; + responses[i] = wp->safekeeper[i].appendResponse.flushLsn >= wp->propTermStartLsn ? wp->safekeeper[i].appendResponse.flushLsn : 0; } qsort(responses, wp->n_safekeepers, sizeof(XLogRecPtr), CompareLsn); @@ -1656,10 +1673,10 @@ UpdateDonorShmem(WalProposer *wp) * about its position immediately after election before any feedbacks are * sent. */ - if (wp->safekeeper[wp->donor].state >= SS_IDLE) + if (wp->safekeeper[wp->donor].state >= SS_WAIT_ELECTED) { donor = &wp->safekeeper[wp->donor]; - donor_lsn = wp->propEpochStartLsn; + donor_lsn = wp->propTermStartLsn; } /* @@ -1748,7 +1765,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk) for (int i = 0; i < wp->n_safekeepers; i++) { Safekeeper *sk = &wp->safekeeper[i]; - bool synced = sk->appendResponse.commitLsn >= wp->propEpochStartLsn; + bool synced = sk->appendResponse.commitLsn >= wp->propTermStartLsn; /* alive safekeeper which is not synced yet; wait for it */ if (sk->state != SS_OFFLINE && !synced) @@ -1772,7 +1789,7 @@ HandleSafekeeperResponse(WalProposer *wp, Safekeeper *fromsk) */ BroadcastAppendRequest(wp); - wp->api.finish_sync_safekeepers(wp, wp->propEpochStartLsn); + wp->api.finish_sync_safekeepers(wp, wp->propTermStartLsn); /* unreachable */ } } @@ -2378,7 +2395,7 @@ FormatSafekeeperState(Safekeeper *sk) case SS_HANDSHAKE_RECV: return_val = "handshake (receiving)"; break; - case SS_VOTING: + case SS_WAIT_VOTING: return_val = "voting"; break; case SS_WAIT_VERDICT: @@ -2387,7 +2404,7 @@ FormatSafekeeperState(Safekeeper *sk) case SS_SEND_ELECTED_FLUSH: return_val = "send-announcement-flush"; break; - case SS_IDLE: + case SS_WAIT_ELECTED: return_val = "idle"; break; case SS_ACTIVE: @@ -2476,8 +2493,8 @@ SafekeeperStateDesiredEvents(Safekeeper *sk, uint32 *sk_events, uint32 *nwr_even * Idle states use read-readiness as a sign that the connection * has been disconnected. */ - case SS_VOTING: - case SS_IDLE: + case SS_WAIT_VOTING: + case SS_WAIT_ELECTED: *sk_events = WL_SOCKET_READABLE; return; diff --git a/pgxn/neon/walproposer.h b/pgxn/neon/walproposer.h index 8d1ae26cac..d116bce806 100644 --- a/pgxn/neon/walproposer.h +++ b/pgxn/neon/walproposer.h @@ -73,12 +73,12 @@ typedef enum * Moved externally by execution of SS_HANDSHAKE_RECV, when we received a * quorum of handshakes. */ - SS_VOTING, + SS_WAIT_VOTING, /* * Already sent voting information, waiting to receive confirmation from - * the node. After receiving, moves to SS_IDLE, if the quorum isn't - * reached yet. + * the node. After receiving, moves to SS_WAIT_ELECTED, if the quorum + * isn't reached yet. */ SS_WAIT_VERDICT, @@ -91,7 +91,7 @@ typedef enum * * Moves to SS_ACTIVE only by call to StartStreaming. */ - SS_IDLE, + SS_WAIT_ELECTED, /* * Active phase, when we acquired quorum and have WAL to send or feedback @@ -751,6 +751,15 @@ typedef struct WalProposerConfig #endif } WalProposerConfig; +typedef enum +{ + /* collecting greetings to determine term to campaign for */ + WPS_COLLECTING_TERMS, + /* campaing started, waiting for votes */ + WPS_CAMPAIGN, + /* successfully elected */ + WPS_ELECTED, +} WalProposerState; /* * WAL proposer state. @@ -758,6 +767,7 @@ typedef struct WalProposerConfig typedef struct WalProposer { WalProposerConfig *config; + WalProposerState state; /* Current walproposer membership configuration */ MembershipConfiguration mconf; @@ -813,10 +823,10 @@ typedef struct WalProposer TermHistory propTermHistory; /* epoch start lsn of the proposer */ - XLogRecPtr propEpochStartLsn; + XLogRecPtr propTermStartLsn; /* Most advanced acceptor epoch */ - term_t donorEpoch; + term_t donorLastLogTerm; /* Most advanced acceptor */ int donor; diff --git a/pgxn/neon/walproposer_pg.c b/pgxn/neon/walproposer_pg.c index b21184de57..9c34c90002 100644 --- a/pgxn/neon/walproposer_pg.c +++ b/pgxn/neon/walproposer_pg.c @@ -1496,7 +1496,7 @@ walprop_pg_wal_reader_allocate(Safekeeper *sk) snprintf(log_prefix, sizeof(log_prefix), WP_LOG_PREFIX "sk %s:%s nwr: ", sk->host, sk->port); Assert(!sk->xlogreader); - sk->xlogreader = NeonWALReaderAllocate(wal_segment_size, sk->wp->propEpochStartLsn, log_prefix); + sk->xlogreader = NeonWALReaderAllocate(wal_segment_size, sk->wp->propTermStartLsn, log_prefix); if (sk->xlogreader == NULL) wpg_log(FATAL, "failed to allocate xlog reader"); } diff --git a/safekeeper/tests/walproposer_sim/walproposer_api.rs b/safekeeper/tests/walproposer_sim/walproposer_api.rs index 6451589e80..82e7a32881 100644 --- a/safekeeper/tests/walproposer_sim/walproposer_api.rs +++ b/safekeeper/tests/walproposer_sim/walproposer_api.rs @@ -511,8 +511,7 @@ impl ApiImpl for SimulationApi { // collected quorum with lower term, then got rejected by next connected safekeeper executor::exit(1, msg.to_owned()); } - if msg.contains("collected propEpochStartLsn") && msg.contains(", but basebackup LSN ") - { + if msg.contains("collected propTermStartLsn") && msg.contains(", but basebackup LSN ") { // sync-safekeepers collected wrong quorum, walproposer collected another quorum executor::exit(1, msg.to_owned()); } @@ -529,7 +528,7 @@ impl ApiImpl for SimulationApi { } fn after_election(&self, wp: &mut walproposer::bindings::WalProposer) { - let prop_lsn = wp.propEpochStartLsn; + let prop_lsn = wp.propTermStartLsn; let prop_term = wp.propTerm; let mut prev_lsn: u64 = 0; @@ -612,7 +611,7 @@ impl ApiImpl for SimulationApi { sk: &mut walproposer::bindings::Safekeeper, ) -> bool { let mut startpos = wp.truncateLsn; - let endpos = wp.propEpochStartLsn; + let endpos = wp.propTermStartLsn; if startpos == endpos { debug!("recovery_download: nothing to download"); From 7588983168dbc2da7e025684180012e036f9b1b7 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 11 Mar 2025 10:33:35 -0400 Subject: [PATCH 145/207] fix(scrubber): log even if no refs are found (#11160) ## Problem Investigate https://github.com/neondatabase/neon/issues/11159 ## Summary of changes This doesn't fix the issue, but at least we can narrow down the cause next time it happens by logging ancestor referenced layer cnt even if it's 0. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 3 ++- storage_scrubber/src/pageserver_physical_gc.rs | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 42b36f7252..123079804b 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1091,7 +1091,7 @@ impl Timeline { let latest_gc_cutoff = self.get_applied_gc_cutoff_lsn(); tracing::info!( - "latest_gc_cutoff: {}, pitr cutoff {}", + "starting shard ancestor compaction, latest_gc_cutoff: {}, pitr cutoff {}", *latest_gc_cutoff, self.gc_info.read().unwrap().cutoffs.time ); @@ -1120,6 +1120,7 @@ impl Timeline { // Expensive, exhaustive check of keys in this layer: this guards against ShardedRange's calculations being // wrong. If ShardedRange claims the local page count is zero, then no keys in this layer // should be !is_key_disposable() + // TODO: exclude sparse keyspace from this check, otherwise it will infinitely loop. let range = layer_desc.get_key_range(); let mut key = range.start; while key < range.end { diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index c956b1abbc..f14341c7bc 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -152,10 +152,8 @@ impl TenantRefAccumulator { } } - if !ancestor_refs.is_empty() { - tracing::info!(%ttid, "Found {} ancestor refs", ancestor_refs.len()); - self.ancestor_ref_shards.update(ttid, ancestor_refs); - } + tracing::info!(%ttid, "Found {} ancestor refs", ancestor_refs.len()); + self.ancestor_ref_shards.update(ttid, ancestor_refs); } /// Consume Self and return a vector of ancestor tenant shards that should be GC'd, and map of referenced ancestor layers to preserve @@ -779,7 +777,7 @@ pub async fn pageserver_physical_gc( let mut summary = GcSummary::default(); { - let timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); + let timelines = timelines.try_buffered(CONCURRENCY); let timelines = timelines.try_flatten(); let timelines = timelines.map_ok(|(ttid, tenant_manifest_arc)| { @@ -793,8 +791,8 @@ pub async fn pageserver_physical_gc( tenant_manifest_arc, ) }); - let mut timelines = std::pin::pin!(timelines.try_buffered(CONCURRENCY)); - + let timelines = timelines.try_buffered(CONCURRENCY); + let mut timelines = std::pin::pin!(timelines); // Drain futures for per-shard GC, populating accumulator as a side effect while let Some(i) = timelines.next().await { summary.merge(i?); From 011f7c21a3151c2f88232e718b8b2f701ec94517 Mon Sep 17 00:00:00 2001 From: Ivan Efremov Date: Tue, 11 Mar 2025 19:17:30 +0200 Subject: [PATCH 146/207] fix(proxy): Add testodrome query id HTTP header (#11167) Handle "X-Neon-Query-ID" header to glue data with testodrome queries. Relates to the #22486 --- proxy/src/compute.rs | 2 +- proxy/src/context/mod.rs | 10 ++++++++-- proxy/src/serverless/backend.rs | 9 +++++++++ proxy/src/serverless/mod.rs | 9 +++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/proxy/src/compute.rs b/proxy/src/compute.rs index 3852bfe348..26254beecf 100644 --- a/proxy/src/compute.rs +++ b/proxy/src/compute.rs @@ -290,7 +290,7 @@ impl ConnCfg { "connected to compute node at {host} ({socket_addr}) sslmode={:?}, latency={}, query_id={}", self.0.get_ssl_mode(), ctx.get_proxy_latency(), - ctx.get_testodrome_id(), + ctx.get_testodrome_id().unwrap_or_default(), ); // NB: CancelToken is supposed to hold socket_addr, but we use connect_raw. diff --git a/proxy/src/context/mod.rs b/proxy/src/context/mod.rs index 4f72a86f30..7c1a6206c1 100644 --- a/proxy/src/context/mod.rs +++ b/proxy/src/context/mod.rs @@ -272,6 +272,13 @@ impl RequestContext { .set_user_agent(user_agent); } + pub(crate) fn set_testodrome_id(&self, query_id: String) { + self.0 + .try_lock() + .expect("should not deadlock") + .set_testodrome_id(query_id); + } + pub(crate) fn set_auth_method(&self, auth_method: AuthMethod) { let mut this = self.0.try_lock().expect("should not deadlock"); this.auth_method = Some(auth_method); @@ -371,13 +378,12 @@ impl RequestContext { .accumulated() } - pub(crate) fn get_testodrome_id(&self) -> String { + pub(crate) fn get_testodrome_id(&self) -> Option { self.0 .try_lock() .expect("should not deadlock") .testodrome_query_id .clone() - .unwrap_or_default() } pub(crate) fn success(&self) { diff --git a/proxy/src/serverless/backend.rs b/proxy/src/serverless/backend.rs index b55661cec8..e40aa024a8 100644 --- a/proxy/src/serverless/backend.rs +++ b/proxy/src/serverless/backend.rs @@ -571,6 +571,11 @@ impl ConnectMechanism for TokioMechanism { "compute_id", tracing::field::display(&node_info.aux.compute_id), ); + + if let Some(query_id) = ctx.get_testodrome_id() { + info!("latency={}, query_id={}", ctx.get_proxy_latency(), query_id); + } + Ok(poll_client( self.pool.clone(), ctx, @@ -628,6 +633,10 @@ impl ConnectMechanism for HyperMechanism { tracing::field::display(&node_info.aux.compute_id), ); + if let Some(query_id) = ctx.get_testodrome_id() { + info!("latency={}, query_id={}", ctx.get_proxy_latency(), query_id); + } + Ok(poll_http2_client( self.pool.clone(), ctx, diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index acd6a05718..a7f46cbe58 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -446,6 +446,15 @@ async fn request_handler( .map(Into::into), ); + let testodrome_id = request + .headers() + .get("X-Neon-Query-ID") + .map(|value| value.to_str().unwrap_or_default().to_string()); + + if let Some(query_id) = testodrome_id { + ctx.set_testodrome_id(query_id); + } + let span = ctx.span(); info!(parent: &span, "performing websocket upgrade"); From 8983677f291bd082b146f4bd649c847a65061fb6 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Tue, 11 Mar 2025 23:09:32 +0400 Subject: [PATCH 147/207] Ignore cargo deny advisory RUSTSEC-2025-0014 for humantime (#11180) ## Problem `humantime` is not maintained and `cargo deny check` fails - Will be addressed in https://github.com/neondatabase/neon/issues/11179 ## Summary of changes Ignore RUSTSEC-2025-0014 advisory for now --- deny.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deny.toml b/deny.toml index ed7aa9ef9f..1023b1833a 100644 --- a/deny.toml +++ b/deny.toml @@ -31,6 +31,10 @@ reason = "the marvin attack only affects private key decryption, not public key id = "RUSTSEC-2024-0436" reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact." +[[advisories.ignore]] +id = "RUSTSEC-2025-0014" +reason = "The humantime is widely used and is not easy to replace right now. It is unmaintained, but it has no known vulnerabilities to care about. #11179" + # This section is considered when running `cargo deny check licenses` # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html From 7d221214bbb9943710ce4f9b5c86d7bc941cf978 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Tue, 11 Mar 2025 15:13:52 -0400 Subject: [PATCH 148/207] feat(pageserver): support no-yield for gc-compaction (#11184) ## Problem This should also resolve the test flakiness of `test_gc_feedback`. close https://github.com/neondatabase/neon/issues/11144 ## Summary of changes If `NoYield` is set, do not yield in gc-compaction. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 55 +++++++++++++------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 123079804b..e6f2104e90 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -393,6 +393,9 @@ impl GcCompactionQueue { if job.dry_run { flags |= CompactFlags::DryRun; } + if options.flags.contains(CompactFlags::NoYield) { + flags |= CompactFlags::NoYield; + } let options = CompactOptions { flags, sub_compaction: false, @@ -2617,6 +2620,7 @@ impl Timeline { ) -> Result { let sub_compaction = options.sub_compaction; let job = GcCompactJob::from_compact_options(options.clone()); + let no_yield = options.flags.contains(CompactFlags::NoYield); if sub_compaction { info!( "running enhanced gc bottom-most compaction with sub-compaction, splitting compaction jobs" @@ -2631,14 +2635,15 @@ impl Timeline { idx + 1, jobs_len ); - self.compact_with_gc_inner(cancel, job, ctx).await?; + self.compact_with_gc_inner(cancel, job, ctx, no_yield) + .await?; } if jobs_len == 0 { info!("no jobs to run, skipping gc bottom-most compaction"); } return Ok(CompactionOutcome::Done); } - self.compact_with_gc_inner(cancel, job, ctx).await + self.compact_with_gc_inner(cancel, job, ctx, no_yield).await } async fn compact_with_gc_inner( @@ -2646,6 +2651,7 @@ impl Timeline { cancel: &CancellationToken, job: GcCompactJob, ctx: &RequestContext, + no_yield: bool, ) -> Result { // Block other compaction/GC tasks from running for now. GC-compaction could run along // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. @@ -2915,14 +2921,18 @@ impl Timeline { if cancel.is_cancelled() { return Err(CompactionError::ShuttingDown); } - let should_yield = self - .l0_compaction_trigger - .notified() - .now_or_never() - .is_some(); - if should_yield { - tracing::info!("preempt gc-compaction when downloading layers: too many L0 layers"); - return Ok(CompactionOutcome::YieldForL0); + if !no_yield { + let should_yield = self + .l0_compaction_trigger + .notified() + .now_or_never() + .is_some(); + if should_yield { + tracing::info!( + "preempt gc-compaction when downloading layers: too many L0 layers" + ); + return Ok(CompactionOutcome::YieldForL0); + } } let resident_layer = layer .download_and_keep_resident(ctx) @@ -3055,16 +3065,21 @@ impl Timeline { if cancel.is_cancelled() { return Err(CompactionError::ShuttingDown); } - keys_processed += 1; - if keys_processed % 1000 == 0 { - let should_yield = self - .l0_compaction_trigger - .notified() - .now_or_never() - .is_some(); - if should_yield { - tracing::info!("preempt gc-compaction in the main loop: too many L0 layers"); - return Ok(CompactionOutcome::YieldForL0); + + if !no_yield { + keys_processed += 1; + if keys_processed % 1000 == 0 { + let should_yield = self + .l0_compaction_trigger + .notified() + .now_or_never() + .is_some(); + if should_yield { + tracing::info!( + "preempt gc-compaction in the main loop: too many L0 layers" + ); + return Ok(CompactionOutcome::YieldForL0); + } } } if self.shard_identity.is_key_disposable(&key) { From 083a30b1e25b28a61009658d4bcdc2d6cb7e2db4 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 11 Mar 2025 20:45:06 +0100 Subject: [PATCH 149/207] storage broker: disable deploy by default (#11172) context - https://github.com/neondatabase/cloud/issues/23486#issuecomment-2711587222 - companion infra.git PR: https://github.com/neondatabase/infra/pull/3249 --- .github/workflows/build_and_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 197b83fac4..1c0971a49d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -1175,7 +1175,7 @@ jobs: -f deployPgSniRouter=false \ -f deployProxy=false \ -f deployStorage=true \ - -f deployStorageBroker=true \ + -f deployStorageBroker=false \ -f deployStorageController=true \ -f branch=main \ -f dockerTag=${{needs.meta.outputs.build-tag}} \ @@ -1183,7 +1183,7 @@ jobs: gh workflow --repo neondatabase/infra run deploy-prod.yml --ref main \ -f deployStorage=true \ - -f deployStorageBroker=true \ + -f deployStorageBroker=false \ -f deployStorageController=true \ -f branch=main \ -f dockerTag=${{needs.meta.outputs.build-tag}} From 158db414bf881fb358494e3215d192c8fa420a53 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 11 Mar 2025 21:40:23 +0100 Subject: [PATCH 150/207] buffered writer: handle write errors by retrying all write IO errors indefinitely (#10993) # Problem If the Pageserver ingest path (InMemoryLayer=>EphemeralFile=>BufferedWriter) encounters ENOSPC or any other write IO error when flushing the mutable buffer of the BufferedWriter, the buffered writer is left in a state where subsequent _reads_ from the InMemoryLayer it will cause a `must not use after we returned an error` panic. The reason is that 1. the flush background task bails on flush failure, 2. causing the `FlushHandle::flush` function to fail at channel.recv() and 3. causing the `FlushHandle::flush` function to bail with the flush error, 4. leaving its caller `BufferedWriter::flush` with `BufferedWriter::mutable = None`, 5. once the InMemoryLayer's RwLock::write guard is dropped, subsequent reads can enter, 6. those reads find `mutable = None` and cause the panic. # Context It has always been the contract that writes against the BufferedWriter API must not be retried because the writer/stream-style/append-only interface makes no atomicity guarantees ("On error, did nothing or a piece of the buffer get appended?"). The idea was that the error would bubble up to upper layers that can throw away the buffered writer and create a new one. (See our [internal error handling policy document on how to handle e.g. `ENOSPC`](https://github.com/neondatabase/docs/blob/c870a50bc099d82444947a353fb302c761949c94/src/storage/handling_io_and_logical_errors.md#L36-L43)). That _might_ be true for delta/image layer writers, I haven't checked. But it's certainly not true for the ingest path: there are no provisions to throw away an InMemoryLayer that encountered a write error an reingest the WAL already written to it. Adding such higher-level retries would involve either resetting last_record_lsn to a lower value and restarting walreceiver. The code isn't flexible enough to do that, and such complexity likely isn't worth it given that write errors are rare. # Solution The solution in this PR is to retry _any_ failing write operation _indefinitely_ inside the buffered writer flush task, except of course those that are fatal as per `maybe_fatal_err`. Retrying indefinitely ensures that `BufferedWriter::mutable` is never left `None` in the case of IO errors, thereby solving the problem described above. It's a clear improvement over the status quo. However, while we're retrying, we build up backpressure because the `flush` is only double-buffered, not infinitely buffered. Backpressure here is generally good to avoid resource exhaustion, **but blocks reads** and hence stalls GetPage requests because InMemoryLayer reads and writes are mutually exclusive. That's orthogonal to the problem that is solved here, though. ## Caveats Note that there are some remaining conditions in the flush background task where it can bail with an error. I have annotated one of them with a TODO comment. Hence the `FlushHandle::flush` is still fallible and hence the overall scenario of leaving `mutable = None` on the bail path is still possible. We can clean that up in a later commit. Note also that retrying indefinitely is great for temporary errors like ENOSPC but likely undesirable in case the `std::io::Error` we get is really due to higher-level logic bugs. For example, we could fail to flush because the timeline or tenant directory got deleted and VirtualFile's reopen fails with ENOENT. Note finally that cancellation is not respected while we're retrying. This means we will block timeline/tenant/pageserver shutdown. The reason is that the existing cancellation story for the buffered writer background task was to recv from flush op channel until the sending side (FlushHandle) is explicitly shut down or dropped. Failing to handle cancellation carries the operational risk that even if a single timeline gets stuck because of a logic bug such as the one laid out above, we must still restart the whole pageserver process. # Alternatives Considered As pointed out in the `Context` section, throwing away a InMemoryLayer that encountered an error and reingesting the WAL is a lot of complexity that IMO isn't justified for such an edge case. Also, it's wasteful. I think it's a local optimum. A more general and simpler solution for ENOSPC is to `abort()` the process and run eviction on startup before bringing up the rest of pageserver. I argued for it in the past, the pro arguments are still valid and complete: https://neondb.slack.com/archives/C033RQ5SPDH/p1716896265296329 The trouble at the time was implementing eviction on startup. However, maybe things are simpler now that we are fully storcon-managed and all tenants have secondaries. For example, if pageserver `abort()`s on ENOSPC and then simply don't respond to storcon heartbeats while we're running eviction on startup, storcon will fail tenants over to the secondary anyway, giving us all the time we need to clean up. The downside is that if there's a systemic space management bug, above proposal will just propagate the problem to other nodes. But I imagine that because of the delays involved with filling up disks, the system might reach a half-stable state, providing operators more time to react. # Demo Intermediary commit `a03f335121480afc0171b0f34606bdf929e962c5` is demoed in this (internal) screen recording: https://drive.google.com/file/d/1nBC6lFV2himQ8vRXDXrY30yfWmI2JL5J/view?usp=drive_link # Perf Testing Ran `bench_ingest` on tmpfs, no measurable difference. Spans are uniquely owned by the flush task, and the span stack isn't too deep, so, enter and exit should be cheap. Plus, each flush takes ~150us with direct IO enabled, so, not _that_ high frequency event anyways. # Refs - fixes https://github.com/neondatabase/neon/issues/10856 --- pageserver/src/tenant/ephemeral_file.rs | 3 +- .../tenant/remote_timeline_client/download.rs | 3 +- pageserver/src/virtual_file.rs | 5 +- .../virtual_file/owned_buffers_io/write.rs | 9 ++- .../owned_buffers_io/write/flush.rs | 65 ++++++++++++++++--- 5 files changed, 68 insertions(+), 17 deletions(-) diff --git a/pageserver/src/tenant/ephemeral_file.rs b/pageserver/src/tenant/ephemeral_file.rs index f048a355a8..396d930f77 100644 --- a/pageserver/src/tenant/ephemeral_file.rs +++ b/pageserver/src/tenant/ephemeral_file.rs @@ -9,7 +9,7 @@ use camino::Utf8PathBuf; use num_traits::Num; use pageserver_api::shard::TenantShardId; use tokio_epoll_uring::{BoundedBuf, Slice}; -use tracing::error; +use tracing::{error, info_span}; use utils::id::TimelineId; use crate::assert_u64_eq_usize::{U64IsUsize, UsizeIsU64}; @@ -76,6 +76,7 @@ impl EphemeralFile { || IoBufferMut::with_capacity(TAIL_SZ), gate.enter()?, ctx, + info_span!(parent: None, "ephemeral_file_buffered_writer", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), timeline_id=%timeline_id, path = %filename), ), _gate_guard: gate.enter()?, }) diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 92be2145ce..954ff0c1d6 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -18,7 +18,7 @@ use tokio::fs::{self, File, OpenOptions}; use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio_util::io::StreamReader; use tokio_util::sync::CancellationToken; -use tracing::warn; +use tracing::{info_span, warn}; use utils::crashsafe::path_with_suffix_extension; use utils::id::{TenantId, TimelineId}; use utils::{backoff, pausable_failpoint}; @@ -229,6 +229,7 @@ async fn download_object( || IoBufferMut::with_capacity(super::BUFFER_SIZE), gate.enter().map_err(|_| DownloadError::Cancelled)?, ctx, + info_span!(parent: None, "download_object_buffered_writer", %dst_path), ); // TODO: use vectored write (writev) once supported by tokio-epoll-uring. diff --git a/pageserver/src/virtual_file.rs b/pageserver/src/virtual_file.rs index 1da3130df0..cd3d897423 100644 --- a/pageserver/src/virtual_file.rs +++ b/pageserver/src/virtual_file.rs @@ -1299,9 +1299,8 @@ impl OwnedAsyncWriter for VirtualFile { buf: FullSlice, offset: u64, ctx: &RequestContext, - ) -> std::io::Result> { - let (buf, res) = VirtualFile::write_all_at(self, buf, offset, ctx).await; - res.map(|_| buf) + ) -> (FullSlice, std::io::Result<()>) { + VirtualFile::write_all_at(self, buf, offset, ctx).await } } diff --git a/pageserver/src/virtual_file/owned_buffers_io/write.rs b/pageserver/src/virtual_file/owned_buffers_io/write.rs index 861ca3aa2a..a7e06c0a14 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/write.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/write.rs @@ -31,7 +31,7 @@ pub trait OwnedAsyncWriter { buf: FullSlice, offset: u64, ctx: &RequestContext, - ) -> impl std::future::Future>> + Send; + ) -> impl std::future::Future, std::io::Result<()>)> + Send; } /// A wrapper aorund an [`OwnedAsyncWriter`] that uses a [`Buffer`] to batch @@ -66,6 +66,7 @@ where buf_new: impl Fn() -> B, gate_guard: utils::sync::gate::GateGuard, ctx: &RequestContext, + flush_task_span: tracing::Span, ) -> Self { Self { writer: writer.clone(), @@ -75,6 +76,7 @@ where buf_new(), gate_guard, ctx.attached_child(), + flush_task_span, ), bytes_submitted: 0, } @@ -269,12 +271,12 @@ mod tests { buf: FullSlice, offset: u64, _: &RequestContext, - ) -> std::io::Result> { + ) -> (FullSlice, std::io::Result<()>) { self.writes .lock() .unwrap() .push((Vec::from(&buf[..]), offset)); - Ok(buf) + (buf, Ok(())) } } @@ -293,6 +295,7 @@ mod tests { || IoBufferMut::with_capacity(2), gate.enter()?, ctx, + tracing::Span::none(), ); writer.write_buffered_borrowed(b"abc", ctx).await?; diff --git a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs index 46309d4011..e3cf9be438 100644 --- a/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs +++ b/pageserver/src/virtual_file/owned_buffers_io/write/flush.rs @@ -1,9 +1,14 @@ +use std::ops::ControlFlow; use std::sync::Arc; +use once_cell::sync::Lazy; +use tokio_util::sync::CancellationToken; +use tracing::{Instrument, info, info_span, warn}; use utils::sync::duplex; use super::{Buffer, CheapCloneForRead, OwnedAsyncWriter}; use crate::context::RequestContext; +use crate::virtual_file::MaybeFatalIo; use crate::virtual_file::owned_buffers_io::io_buf_aligned::IoBufAligned; use crate::virtual_file::owned_buffers_io::io_buf_ext::FullSlice; @@ -118,6 +123,7 @@ where buf: B, gate_guard: utils::sync::gate::GateGuard, ctx: RequestContext, + span: tracing::Span, ) -> Self where B: Buffer + Send + 'static, @@ -125,11 +131,14 @@ where // It is fine to buffer up to only 1 message. We only 1 message in-flight at a time. let (front, back) = duplex::mpsc::channel(1); - let join_handle = tokio::spawn(async move { - FlushBackgroundTask::new(back, file, gate_guard, ctx) - .run(buf.flush()) - .await - }); + let join_handle = tokio::spawn( + async move { + FlushBackgroundTask::new(back, file, gate_guard, ctx) + .run(buf.flush()) + .await + } + .instrument(span), + ); FlushHandle { inner: Some(FlushHandleInner { @@ -236,6 +245,7 @@ where /// The passed in slice is immediately sent back to the flush handle through the duplex channel. async fn run(mut self, slice: FullSlice) -> std::io::Result> { // Sends the extra buffer back to the handle. + // TODO: can this ever await and or fail? I think not. self.channel.send(slice).await.map_err(|_| { std::io::Error::new(std::io::ErrorKind::BrokenPipe, "flush handle closed early") })?; @@ -251,10 +261,47 @@ where } // Write slice to disk at `offset`. - let slice = self - .writer - .write_all_at(request.slice, request.offset, &self.ctx) - .await?; + // + // Error handling happens according to the current policy of crashing + // on fatal IO errors and retrying in place otherwise (deeming all other errors retryable). + // (The upper layers of the Pageserver write path are not equipped to retry write errors + // becasuse they often deallocate the buffers that were already written). + // + // TODO: cancellation sensitiity. + // Without it, if we hit a bug where retrying is never successful, + // then we can't shut down the timeline/tenant/pageserver cleanly because + // layers of the Pageserver write path are holding the gate open for EphemeralFile. + // + // TODO: use utils::backoff::retry once async closures are actually usable + // + let mut slice_storage = Some(request.slice); + for attempt in 1.. { + let result = async { + if attempt > 1 { + info!("retrying flush"); + } + let slice = slice_storage.take().expect( + "likely previous invocation of this future didn't get polled to completion", + ); + let (slice, res) = self.writer.write_all_at(slice, request.offset, &self.ctx).await; + slice_storage = Some(slice); + let res = res.maybe_fatal_err("owned_buffers_io flush"); + let Err(err) = res else { + return ControlFlow::Break(()); + }; + warn!(%err, "error flushing buffered writer buffer to disk, retrying after backoff"); + static NO_CANCELLATION: Lazy = Lazy::new(CancellationToken::new); + utils::backoff::exponential_backoff(attempt, 1.0, 10.0, &NO_CANCELLATION).await; + ControlFlow::Continue(()) + } + .instrument(info_span!("flush_attempt", %attempt)) + .await; + match result { + ControlFlow::Break(()) => break, + ControlFlow::Continue(()) => continue, + } + } + let slice = slice_storage.expect("loop must have run at least once"); #[cfg(test)] { From decd265c99a160d5d5c37332ef23c2c0898b40f9 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Tue, 11 Mar 2025 17:18:09 -0500 Subject: [PATCH 151/207] Revert notify to 6.0.0 (#11162) The upgrade to 8.0.0 caused severe performance regressions in the start_postgres_ms metric, which measures the time it takes from execing Postgres to the time Postgres marks itself as ready in the postmaster.pid file. We use the notify crate to watch for changes in the pgdata directory and the postmaster.pid file. Signed-off-by: Tristan Partin --- Cargo.lock | 42 +++++++++++++++++++++------------------ Cargo.toml | 4 +++- workspace_hack/Cargo.toml | 6 ++---- 3 files changed, 28 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 778ff19fec..d023d340d4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3254,11 +3254,11 @@ dependencies = [ [[package]] name = "inotify" -version = "0.11.0" +version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" +checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff" dependencies = [ - "bitflags 2.8.0", + "bitflags 1.3.2", "inotify-sys", "libc", ] @@ -3732,6 +3732,18 @@ dependencies = [ "adler2", ] +[[package]] +name = "mio" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c" +dependencies = [ + "libc", + "log", + "wasi 0.11.0+wasi-snapshot-preview1", + "windows-sys 0.48.0", +] + [[package]] name = "mio" version = "1.0.3" @@ -3739,7 +3751,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ "libc", - "log", "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys 0.52.0", ] @@ -3817,29 +3828,23 @@ checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" [[package]] name = "notify" -version = "8.0.0" +version = "6.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943" +checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d" dependencies = [ "bitflags 2.8.0", + "crossbeam-channel", "filetime", "fsevent-sys", "inotify", "kqueue", "libc", "log", - "mio", - "notify-types", + "mio 0.8.11", "walkdir", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] -[[package]] -name = "notify-types" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e0826a989adedc2a244799e823aece04662b66609d96af8dff7ac6df9a8925d" - [[package]] name = "ntapi" version = "0.4.1" @@ -4980,7 +4985,7 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck", - "itertools 0.10.5", + "itertools 0.12.1", "log", "multimap", "once_cell", @@ -5021,7 +5026,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.10.5", + "itertools 0.12.1", "proc-macro2", "quote", "syn 2.0.90", @@ -7123,7 +7128,7 @@ dependencies = [ "backtrace", "bytes", "libc", - "mio", + "mio 1.0.3", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", @@ -8387,7 +8392,6 @@ dependencies = [ "hyper-util", "indexmap 1.9.3", "indexmap 2.0.1", - "itertools 0.10.5", "itertools 0.12.1", "lazy_static", "libc", diff --git a/Cargo.toml b/Cargo.toml index c59c4c5435..4a32b6d95d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,7 +126,9 @@ measured = { version = "0.0.22", features=["lasso"] } measured-process = { version = "0.0.22" } memoffset = "0.9" nix = { version = "0.27", features = ["dir", "fs", "process", "socket", "signal", "poll"] } -notify = "8.0.0" +# Do not update to >= 7.0.0, at least. The update will have a significant impact +# on compute startup metrics (start_postgres_ms), >= 25% degradation. +notify = "6.0.0" num_cpus = "1.15" num-traits = "0.2.15" once_cell = "1.13" diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 4a6ab6e745..183cc66ab9 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -47,8 +47,7 @@ hyper-dff4ba8e3ae991db = { package = "hyper", version = "1", features = ["full"] hyper-util = { version = "0.1", features = ["client-legacy", "http1", "http2", "server", "service"] } indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12" } -itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" } +itertools = { version = "0.12" } lazy_static = { version = "1", default-features = false, features = ["spin_no_std"] } libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } @@ -115,8 +114,7 @@ half = { version = "2", default-features = false, features = ["num-traits"] } hashbrown = { version = "0.14", features = ["raw"] } indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1", default-features = false, features = ["std"] } indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2", features = ["serde"] } -itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12" } -itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10" } +itertools = { version = "0.12" } libc = { version = "0.2", features = ["extra_traits", "use_std"] } log = { version = "0.4", default-features = false, features = ["std"] } memchr = { version = "2" } From e8396034acca009716a8431b23aa924efaa057a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Tue, 11 Mar 2025 23:59:30 +0100 Subject: [PATCH 152/207] fix(ci): fail meta using jq halt_error if data is unexpectedly missing (#11151) ## Problem When the githb API is having problems, we might not get data back, and are happily setting vars as empty. This causes problems down the line. See https://github.com/neondatabase/neon/actions/runs/13718859397/job/38381946590?pr=11132#step:5:1 for example. ## Summary of changes Fail the `meta` job if we don't get expected data back from github. --- .github/scripts/previous-releases.jq | 6 ++++++ .github/workflows/_meta.yml | 10 +++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/.github/scripts/previous-releases.jq b/.github/scripts/previous-releases.jq index b0b00bce18..51204da099 100644 --- a/.github/scripts/previous-releases.jq +++ b/.github/scripts/previous-releases.jq @@ -17,6 +17,12 @@ ({}; .[$entry.component] |= (if . == null or $entry.version > .version then $entry else . end)) +# Ensure that each component exists, or fail +| (["storage", "compute", "proxy"] - (keys)) as $missing +| if ($missing | length) > 0 then + "Error: Found no release for \($missing | join(", "))!\n" | halt_error(1) + else . end + # Convert the resulting object into an array of formatted strings | to_entries | map("\(.key)=\(.value.full)") diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index a3fc125648..cae7fae6a4 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -24,6 +24,10 @@ on: permissions: {} +defaults: + run: + shell: bash -euo pipefail {0} + jobs: tags: runs-on: ubuntu-22.04 @@ -83,7 +87,11 @@ jobs: echo "tag=release-compute-$(git rev-list --count HEAD)" | tee -a $GITHUB_OUTPUT ;; pr|storage-rc-pr|compute-rc-pr|proxy-rc-pr) - BUILD_AND_TEST_RUN_ID=$(gh run list -b $CURRENT_BRANCH -c $CURRENT_SHA -w 'Build and Test' -L 1 --json databaseId --jq '.[].databaseId') + BUILD_AND_TEST_RUN_ID=$(gh api --paginate \ + -H "Accept: application/vnd.github+json" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=${CURRENT_SHA}&branch=${CURRENT_BRANCH}" \ + | jq '[.workflow_runs[] | select(.name == "Build and Test")][0].id // ("Error: No matching workflow run found." | halt_error(1))') echo "tag=$BUILD_AND_TEST_RUN_ID" | tee -a $GITHUB_OUTPUT ;; workflow-dispatch) From da2431f11fcf71fbe905f6bc129b6f7902f3d44e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 12 Mar 2025 03:30:56 +0100 Subject: [PATCH 153/207] storcon: add --control-plane-url config option (#11173) Adds the `--control-plane-url` config option to the storcon, which we want to migrate to instead of using `notify-attach`. Part of #11163 --- docs/storage_controller.md | 2 +- storage_controller/src/compute_hook.rs | 11 ++++++++++- storage_controller/src/main.rs | 13 ++++++++++--- storage_controller/src/service.rs | 9 +++++++++ 4 files changed, 30 insertions(+), 5 deletions(-) diff --git a/docs/storage_controller.md b/docs/storage_controller.md index 6d2ef929a4..cf00cd8e33 100644 --- a/docs/storage_controller.md +++ b/docs/storage_controller.md @@ -101,7 +101,7 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver location changes. -The hook is configured using the storage controller's `--compute-hook-url` CLI option. If the hook requires +The hook is configured using the storage controller's `--control-plane-url` CLI option. If the hook requires JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request. In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems diff --git a/storage_controller/src/compute_hook.rs b/storage_controller/src/compute_hook.rs index b602af362d..5ce4d63d77 100644 --- a/storage_controller/src/compute_hook.rs +++ b/storage_controller/src/compute_hook.rs @@ -624,7 +624,16 @@ impl ComputeHook { MaybeSendResult::Transmit((request, lock)) => (request, lock), }; - let result = if let Some(notify_url) = &self.config.compute_hook_url { + let compute_hook_url = if let Some(control_plane_url) = &self.config.control_plane_url { + Some(if control_plane_url.ends_with('/') { + format!("{control_plane_url}notify-attach") + } else { + format!("{control_plane_url}/notify-attach") + }) + } else { + self.config.compute_hook_url.clone() + }; + let result = if let Some(notify_url) = &compute_hook_url { self.do_notify(notify_url, &request, cancel).await } else { self.do_notify_local(&request).await.map_err(|e| { diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index 46ac1cd7ca..6e3c70c42b 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -71,6 +71,10 @@ struct Cli { #[arg(long)] compute_hook_url: Option, + /// URL to control plane storage API prefix + #[arg(long)] + control_plane_url: Option, + /// URL to connect to postgres, like postgresql://localhost:1234/storage_controller #[arg(long)] database_url: Option, @@ -313,11 +317,13 @@ async fn async_main() -> anyhow::Result<()> { "Insecure config! One or more secrets is not set. This is only permitted in `--dev` mode" ); } - StrictMode::Strict if args.compute_hook_url.is_none() => { - // Production systems should always have a compute hook set, to prevent falling + StrictMode::Strict + if args.compute_hook_url.is_none() && args.control_plane_url.is_none() => + { + // Production systems should always have a control plane URL set, to prevent falling // back to trying to use neon_local. anyhow::bail!( - "`--compute-hook-url` is not set: this is only permitted in `--dev` mode" + "neither `--compute-hook-url` nor `--control-plane-url` are set: this is only permitted in `--dev` mode" ); } StrictMode::Strict => { @@ -343,6 +349,7 @@ async fn async_main() -> anyhow::Result<()> { control_plane_jwt_token: secrets.control_plane_jwt_token, peer_jwt_token: secrets.peer_jwt_token, compute_hook_url: args.compute_hook_url, + control_plane_url: args.control_plane_url, max_offline_interval: args .max_offline_interval .map(humantime::Duration::into) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index a06748abc6..96b67fa81e 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -363,6 +363,15 @@ pub struct Config { /// assume it is running in a test environment and try to update neon_local. pub compute_hook_url: Option, + /// Prefix for storage API endpoints of the control plane. We use this prefix to compute + /// URLs that we use to send pageserver and safekeeper attachment locations. + /// If this is None, the compute hook will assume it is running in a test environment + /// and try to invoke neon_local instead. + /// + /// For now, there is also `compute_hook_url` which allows configuration of the pageserver + /// specific endpoint, but it is in the process of being phased out. + pub control_plane_url: Option, + /// Grace period within which a pageserver does not respond to heartbeats, but is still /// considered active. Once the grace period elapses, the next heartbeat failure will /// mark the pagseserver offline. From f60ffe30214ddb7042b5bd2febab663d4d36f6e3 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Wed, 12 Mar 2025 07:52:18 +0200 Subject: [PATCH 154/207] Rebase compare local debug mode (#11174) ## Problem DEBUG_COMPARE_LOCAL mode is broken See https://neondb.slack.com/archives/C03QLRH7PPD/p1732862608323269?thread_ts=1732711054.862919&cid=C03QLRH7PPD ## Summary of changes Fix compile errors and unlogged build issues. Co-authored-by: Konstantin Knizhnik --- pgxn/neon/pagestore_smgr.c | 105 +++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 0414661a5f..1135212e22 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -76,6 +76,10 @@ #include "access/xlogrecovery.h" #endif +#if PG_VERSION_NUM < 160000 +typedef PGAlignedBlock PGIOAlignedBlock; +#endif + /* * If DEBUG_COMPARE_LOCAL is defined, we pass through all the SMGR API * calls to md.c, and *also* do the calls to the Page Server. On every @@ -1803,7 +1807,7 @@ static XLogRecPtr log_newpage_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno, Page page, bool page_std) { - PGAlignedBlock copied_buffer; + PGIOAlignedBlock copied_buffer; memcpy(copied_buffer.data, page, BLCKSZ); return log_newpage(rinfo, forkNum, blkno, copied_buffer.data, page_std); @@ -1820,7 +1824,7 @@ static XLogRecPtr log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno, BlockNumber nblocks, Page *pages, bool page_std) { - PGAlignedBlock copied_buffer[XLR_MAX_BLOCK_ID]; + PGIOAlignedBlock copied_buffer[XLR_MAX_BLOCK_ID]; BlockNumber blknos[XLR_MAX_BLOCK_ID]; Page pageptrs[XLR_MAX_BLOCK_ID]; int nregistered = 0; @@ -1858,7 +1862,7 @@ log_newpages_copy(NRelFileInfo * rinfo, ForkNumber forkNum, BlockNumber blkno, static bool PageIsEmptyHeapPage(char *buffer) { - PGAlignedBlock empty_page; + PGIOAlignedBlock empty_page; PageInit((Page) empty_page.data, BLCKSZ, 0); @@ -2847,7 +2851,7 @@ static void neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, int nblocks, bool skipFsync) { - const PGAlignedBlock buffer = {0}; + const PGIOAlignedBlock buffer = {0}; int remblocks = nblocks; XLogRecPtr lsn = 0; @@ -3389,15 +3393,16 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) { char pageserver_masked[BLCKSZ]; - char mdbuf[BLCKSZ]; - char mdbuf_masked[BLCKSZ]; + PGIOAlignedBlock mdbuf; + PGIOAlignedBlock mdbuf_masked; + XLogRecPtr request_lsn = request_lsns.request_lsn; - mdread(reln, forkNum, blkno, mdbuf); + mdread(reln, forkNum, blkno, mdbuf.data); memcpy(pageserver_masked, buffer, BLCKSZ); - memcpy(mdbuf_masked, mdbuf, BLCKSZ); + memcpy(mdbuf_masked.data, mdbuf.data, BLCKSZ); - if (PageIsNew((Page) mdbuf)) + if (PageIsNew((Page) mdbuf.data)) { if (!PageIsNew((Page) pageserver_masked)) { @@ -3416,41 +3421,41 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno, void *buffer RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf)); + hexdump_page(mdbuf.data)); } - else if (PageGetSpecialSize(mdbuf) == 0) + else if (PageGetSpecialSize(mdbuf.data) == 0) { /* assume heap */ - RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked.data, blkno); RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno); - if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf_masked), + hexdump_page(mdbuf_masked.data), hexdump_page(pageserver_masked)); } } - else if (PageGetSpecialSize(mdbuf) == MAXALIGN(sizeof(BTPageOpaqueData))) + else if (PageGetSpecialSize(mdbuf.data) == MAXALIGN(sizeof(BTPageOpaqueData))) { - if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf))->btpo_cycleid < MAX_BT_CYCLE_ID) + if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf.data))->btpo_cycleid < MAX_BT_CYCLE_ID) { /* assume btree */ - RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked.data, blkno); RmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno); - if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), forkNum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf_masked), + hexdump_page(mdbuf_masked.data), hexdump_page(pageserver_masked)); } } @@ -3542,77 +3547,85 @@ neon_readv(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, prefetch_pump_state(false); #ifdef DEBUG_COMPARE_LOCAL - if (forkNum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) + if (forknum == MAIN_FORKNUM && IS_LOCAL_REL(reln)) { char pageserver_masked[BLCKSZ]; - char mdbuf[BLCKSZ]; - char mdbuf_masked[BLCKSZ]; + PGIOAlignedBlock mdbuf; + PGIOAlignedBlock mdbuf_masked; + XLogRecPtr request_lsn = request_lsns->request_lsn; for (int i = 0; i < nblocks; i++) { + BlockNumber blkno = blocknum + i; + if (!BITMAP_ISSET(read, i)) + continue; + #if PG_MAJORVERSION_NUM >= 17 - mdreadv(reln, forkNum, blkno + i, &mdbuf, 1); + { + void* mdbuffers[1] = { mdbuf.data }; + mdreadv(reln, forknum, blkno, mdbuffers, 1); + } #else - mdread(reln, forkNum, blkno + i, mdbuf); + mdread(reln, forknum, blkno, mdbuf.data); #endif - memcpy(pageserver_masked, buffer, BLCKSZ); - memcpy(mdbuf_masked, mdbuf, BLCKSZ); + memcpy(pageserver_masked, buffers[i], BLCKSZ); + memcpy(mdbuf_masked.data, mdbuf.data, BLCKSZ); - if (PageIsNew((Page) mdbuf)) + if (PageIsNew((Page) mdbuf.data)) { if (!PageIsNew((Page) pageserver_masked)) { neon_log(PANIC, "page is new in MD but not in Page Server at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), - forkNum, + forknum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(buffer)); + hexdump_page(buffers[i])); } } - else if (PageIsNew((Page) buffer)) + else if (PageIsNew((Page) buffers[i])) { neon_log(PANIC, "page is new in Page Server but not in MD at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), - forkNum, + forknum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf)); + hexdump_page(mdbuf.data)); } - else if (PageGetSpecialSize(mdbuf) == 0) + else if (PageGetSpecialSize(mdbuf.data) == 0) { /* assume heap */ - RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_HEAP_ID].rm_mask(mdbuf_masked.data, blkno); RmgrTable[RM_HEAP_ID].rm_mask(pageserver_masked, blkno); - if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "heap buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), - forkNum, + forknum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf_masked), + hexdump_page(mdbuf_masked.data), hexdump_page(pageserver_masked)); } } - else if (PageGetSpecialSize(mdbuf) == MAXALIGN(sizeof(BTPageOpaqueData))) + else if (PageGetSpecialSize(mdbuf.data) == MAXALIGN(sizeof(BTPageOpaqueData))) { - if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf))->btpo_cycleid < MAX_BT_CYCLE_ID) + if (((BTPageOpaqueData *) PageGetSpecialPointer(mdbuf.data))->btpo_cycleid < MAX_BT_CYCLE_ID) { /* assume btree */ - RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked, blkno); + RmgrTable[RM_BTREE_ID].rm_mask(mdbuf_masked.data, blkno); RmgrTable[RM_BTREE_ID].rm_mask(pageserver_masked, blkno); - if (memcmp(mdbuf_masked, pageserver_masked, BLCKSZ) != 0) + if (memcmp(mdbuf_masked.data, pageserver_masked, BLCKSZ) != 0) { neon_log(PANIC, "btree buffers differ at blk %u in rel %u/%u/%u fork %u (request LSN %X/%08X):\n------ MD ------\n%s\n------ Page Server ------\n%s\n", blkno, RelFileInfoFmt(InfoFromSMgrRel(reln)), - forkNum, + forknum, (uint32) (request_lsn >> 32), (uint32) request_lsn, - hexdump_page(mdbuf_masked), + hexdump_page(mdbuf_masked.data), hexdump_page(pageserver_masked)); } } @@ -3664,6 +3677,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo switch (reln->smgr_relpersistence) { case 0: +#ifndef DEBUG_COMPARE_LOCAL /* This is a bit tricky. Check if the relation exists locally */ if (mdexists(reln, forknum)) { @@ -3682,6 +3696,7 @@ neon_write(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, const vo */ return; } +#endif break; case RELPERSISTENCE_PERMANENT: @@ -3732,6 +3747,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, switch (reln->smgr_relpersistence) { case 0: +#ifndef DEBUG_COMPARE_LOCAL /* This is a bit tricky. Check if the relation exists locally */ if (mdexists(reln, forknum)) { @@ -3747,6 +3763,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, */ return; } +#endif break; case RELPERSISTENCE_PERMANENT: @@ -3768,7 +3785,7 @@ neon_writev(SMgrRelation reln, ForkNumber forknum, BlockNumber blkno, #ifdef DEBUG_COMPARE_LOCAL if (IS_LOCAL_REL(reln)) - mdwritev(reln, forknum, blocknum, &buffer, 1, skipFsync); + mdwritev(reln, forknum, blkno, buffers, nblocks, skipFsync); #endif } From 7bf639733417e122b68e1d9dbfe23e71d23ae1b0 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 12 Mar 2025 10:23:41 +0000 Subject: [PATCH 155/207] pageserver: remove legacy `TimelineInfo::latest_gc_cutoff` field (1/2) (#11149) ## Problem This field was retained for backward compat only in https://github.com/neondatabase/neon/pull/10707. Once https://github.com/neondatabase/cloud/pull/25233 is released, nothing external will be reading this field. Internally, this was a mandatory field so storage controller is still trying to decode it, so we must do this removal in two steps: this PR makes the field optional, and after one release we can fully remove it. Related: https://github.com/neondatabase/cloud/issues/24250 ## Summary of changes - Rename field to `_unused` - Remove field from swagger - Make field optional --- libs/pageserver_api/src/models.rs | 7 ++++--- pageserver/src/http/openapi_spec.yml | 4 ---- pageserver/src/http/routes.rs | 5 +---- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index b1ebad83b1..5e5bcf5338 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -1225,9 +1225,10 @@ pub struct TimelineInfo { pub last_record_lsn: Lsn, pub prev_record_lsn: Option, - /// Legacy field for compat with control plane. Synonym of `min_readable_lsn`. - /// TODO: remove once control plane no longer reads it. - pub latest_gc_cutoff_lsn: Lsn, + /// Legacy field, retained for one version to enable old storage controller to + /// decode (it was a mandatory field). + #[serde(default, rename = "latest_gc_cutoff_lsn")] + pub _unused: Lsn, /// The LSN up to which GC has advanced: older data may still exist but it is not available for clients. /// This LSN is not suitable for deciding where to create branches etc: use [`TimelineInfo::min_readable_lsn`] instead, diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index 0fb9a240d5..e799efcce3 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -1079,7 +1079,6 @@ components: - last_record_lsn - disk_consistent_lsn - state - - latest_gc_cutoff_lsn properties: timeline_id: type: string @@ -1123,9 +1122,6 @@ components: min_readable_lsn: type: string format: hex - latest_gc_cutoff_lsn: - type: string - format: hex applied_gc_cutoff_lsn: type: string format: hex diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index e5848bfd25..ba5fb521ff 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -460,10 +460,7 @@ async fn build_timeline_info_common( initdb_lsn, last_record_lsn, prev_record_lsn: Some(timeline.get_prev_record_lsn()), - // Externally, expose the lowest LSN that can be used to create a branch as the "GC cutoff", although internally - // we distinguish between the "planned" GC cutoff (PITR point) and the "latest" GC cutoff (where we - // actually trimmed data to), which can pass each other when PITR is changed. - latest_gc_cutoff_lsn: min_readable_lsn, + _unused: Default::default(), // Unused, for legacy decode only min_readable_lsn, applied_gc_cutoff_lsn: *timeline.get_applied_gc_cutoff_lsn(), current_logical_size: current_logical_size.size_dont_care_about_accuracy(), From 1c0ff3c04d5432ba5f0295042fcca77118ebf49e Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 12 Mar 2025 11:07:49 +0000 Subject: [PATCH 156/207] utils: explicit OTEL export config and OTEL enablement via common entry point (#11139) We want to export performance traces from the pageserver in OTEL format. End goal is to see them in Grafana. To this end, there are two changes here: 1. Update the `tracing-utils` crate to allow for explicitly specifying the export configuration. Pageserver configuration is loaded from a file on start-up. This allows us to use the same flow for export configs there. 2. Update the `utils::logging::init` common entry point to set up OTEL tracing infrastructure if requested. Note that an entirely different tracing subscriber is used. This is to avoid interference with the existing tracing set-up. For now, no service uses this functionality. PR to plug this into the pageserver is [here](https://github.com/neondatabase/neon/pull/11140). Related https://github.com/neondatabase/neon/issues/9873 --- Cargo.lock | 2 + compute_tools/src/logger.rs | 3 +- libs/tracing-utils/src/lib.rs | 83 +++++++++++++++++++++++++--- libs/utils/Cargo.toml | 1 + libs/utils/src/logging.rs | 1 + pageserver/Cargo.toml | 1 + pageserver/compaction/tests/tests.rs | 2 +- pageserver/src/bin/pageserver.rs | 1 + pageserver/src/tenant.rs | 2 +- proxy/src/logging.rs | 3 +- 10 files changed, 87 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d023d340d4..dd13e5a833 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4313,6 +4313,7 @@ dependencies = [ "tokio-util", "toml_edit", "tracing", + "tracing-utils", "url", "utils", "uuid", @@ -7850,6 +7851,7 @@ dependencies = [ "tracing", "tracing-error", "tracing-subscriber", + "tracing-utils", "walkdir", ] diff --git a/compute_tools/src/logger.rs b/compute_tools/src/logger.rs index a65614e94e..c36f302f99 100644 --- a/compute_tools/src/logger.rs +++ b/compute_tools/src/logger.rs @@ -24,7 +24,8 @@ pub async fn init_tracing_and_logging(default_log_level: &str) -> anyhow::Result .with_writer(std::io::stderr); // Initialize OpenTelemetry - let otlp_layer = tracing_utils::init_tracing("compute_ctl").await; + let otlp_layer = + tracing_utils::init_tracing("compute_ctl", tracing_utils::ExportConfig::default()).await; // Put it all together tracing_subscriber::registry() diff --git a/libs/tracing-utils/src/lib.rs b/libs/tracing-utils/src/lib.rs index 72f94d61e4..74992a7d03 100644 --- a/libs/tracing-utils/src/lib.rs +++ b/libs/tracing-utils/src/lib.rs @@ -21,7 +21,7 @@ //! .with_writer(std::io::stderr); //! //! // Initialize OpenTelemetry. Exports tracing spans as OpenTelemetry traces -//! let otlp_layer = tracing_utils::init_tracing("my_application").await; +//! let otlp_layer = tracing_utils::init_tracing("my_application", tracing_utils::ExportConfig::default()).await; //! //! // Put it all together //! tracing_subscriber::registry() @@ -38,8 +38,12 @@ pub mod http; use opentelemetry::KeyValue; use opentelemetry::trace::TracerProvider; -use tracing::Subscriber; +use opentelemetry_otlp::WithExportConfig; +pub use opentelemetry_otlp::{ExportConfig, Protocol}; +use tracing::level_filters::LevelFilter; +use tracing::{Dispatch, Subscriber}; use tracing_subscriber::Layer; +use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::registry::LookupSpan; /// Set up OpenTelemetry exporter, using configuration from environment variables. @@ -69,19 +73,28 @@ use tracing_subscriber::registry::LookupSpan; /// /// This doesn't block, but is marked as 'async' to hint that this must be called in /// asynchronous execution context. -pub async fn init_tracing(service_name: &str) -> Option> +pub async fn init_tracing( + service_name: &str, + export_config: ExportConfig, +) -> Option> where S: Subscriber + for<'span> LookupSpan<'span>, { if std::env::var("OTEL_SDK_DISABLED") == Ok("true".to_string()) { return None; }; - Some(init_tracing_internal(service_name.to_string())) + Some(init_tracing_internal( + service_name.to_string(), + export_config, + )) } /// Like `init_tracing`, but creates a separate tokio Runtime for the tracing /// tasks. -pub fn init_tracing_without_runtime(service_name: &str) -> Option> +pub fn init_tracing_without_runtime( + service_name: &str, + export_config: ExportConfig, +) -> Option> where S: Subscriber + for<'span> LookupSpan<'span>, { @@ -112,16 +125,22 @@ where )); let _guard = runtime.enter(); - Some(init_tracing_internal(service_name.to_string())) + Some(init_tracing_internal( + service_name.to_string(), + export_config, + )) } -fn init_tracing_internal(service_name: String) -> impl Layer +fn init_tracing_internal(service_name: String, export_config: ExportConfig) -> impl Layer where S: Subscriber + for<'span> LookupSpan<'span>, { - // Sets up exporter from the OTEL_EXPORTER_* environment variables. + // Sets up exporter from the provided [`ExportConfig`] parameter. + // If the endpoint is not specified, it is loaded from the + // OTEL_EXPORTER_OTLP_ENDPOINT environment variable. let exporter = opentelemetry_otlp::SpanExporter::builder() .with_http() + .with_export_config(export_config) .build() .expect("could not initialize opentelemetry exporter"); @@ -151,3 +170,51 @@ where pub fn shutdown_tracing() { opentelemetry::global::shutdown_tracer_provider(); } + +pub enum OtelEnablement { + Disabled, + Enabled { + service_name: String, + export_config: ExportConfig, + runtime: &'static tokio::runtime::Runtime, + }, +} + +pub struct OtelGuard { + pub dispatch: Dispatch, +} + +impl Drop for OtelGuard { + fn drop(&mut self) { + shutdown_tracing(); + } +} + +/// Initializes OTEL infrastructure for performance tracing according to the provided configuration +/// +/// Performance tracing is handled by a different [`tracing::Subscriber`]. This functions returns +/// an [`OtelGuard`] containing a [`tracing::Dispatch`] associated with a newly created subscriber. +/// Applications should use this dispatch for their performance traces. +/// +/// The lifetime of the guard should match taht of the application. On drop, it tears down the +/// OTEL infra. +pub fn init_performance_tracing(otel_enablement: OtelEnablement) -> Option { + let otel_subscriber = match otel_enablement { + OtelEnablement::Disabled => None, + OtelEnablement::Enabled { + service_name, + export_config, + runtime, + } => { + let otel_layer = runtime + .block_on(init_tracing(&service_name, export_config)) + .with_filter(LevelFilter::INFO); + let otel_subscriber = tracing_subscriber::registry().with(otel_layer); + let otel_dispatch = Dispatch::new(otel_subscriber); + + Some(otel_dispatch) + } + }; + + otel_subscriber.map(|dispatch| OtelGuard { dispatch }) +} diff --git a/libs/utils/Cargo.toml b/libs/utils/Cargo.toml index ac44300a51..4180602ac7 100644 --- a/libs/utils/Cargo.toml +++ b/libs/utils/Cargo.toml @@ -42,6 +42,7 @@ toml_edit = { workspace = true, features = ["serde"] } tracing.workspace = true tracing-error.workspace = true tracing-subscriber = { workspace = true, features = ["json", "registry"] } +tracing-utils.workspace = true rand.workspace = true scopeguard.workspace = true strum.workspace = true diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index 881f1e765d..f37f05692a 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -165,6 +165,7 @@ pub fn init( }; log_layer.with_filter(rust_log_env_filter()) }); + let r = r.with( TracingEventCountLayer(&TRACING_EVENT_COUNT_METRIC).with_filter(rust_log_env_filter()), ); diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index a372be5044..d17a19ce65 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -70,6 +70,7 @@ tokio-stream.workspace = true tokio-util.workspace = true toml_edit = { workspace = true, features = [ "serde" ] } tracing.workspace = true +tracing-utils.workspace = true url.workspace = true walkdir.workspace = true metrics.workspace = true diff --git a/pageserver/compaction/tests/tests.rs b/pageserver/compaction/tests/tests.rs index bd8b54a286..565f66ce1a 100644 --- a/pageserver/compaction/tests/tests.rs +++ b/pageserver/compaction/tests/tests.rs @@ -12,7 +12,7 @@ pub(crate) fn setup_logging() { logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, logging::Output::Stdout, ) - .expect("Failed to init test logging") + .expect("Failed to init test logging"); }); } diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index c4af0d5d41..4d30a6358b 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -111,6 +111,7 @@ fn main() -> anyhow::Result<()> { } else { TracingErrorLayerEnablement::Disabled }; + logging::init( conf.log_format, tracing_error_layer_enablement, diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 62e1cdac0c..2bce56345a 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -5754,7 +5754,7 @@ pub(crate) mod harness { logging::TracingErrorLayerEnablement::EnableWithRustLogFilter, logging::Output::Stdout, ) - .expect("Failed to init test logging") + .expect("Failed to init test logging"); }); } diff --git a/proxy/src/logging.rs b/proxy/src/logging.rs index 6f9845fd6e..454fe81357 100644 --- a/proxy/src/logging.rs +++ b/proxy/src/logging.rs @@ -46,7 +46,8 @@ pub async fn init() -> anyhow::Result { .expect("this should be a valid filter directive"), ); - let otlp_layer = tracing_utils::init_tracing("proxy").await; + let otlp_layer = + tracing_utils::init_tracing("proxy", tracing_utils::ExportConfig::default()).await; let json_log_layer = if logfmt == LogFormat::Json { Some(JsonLoggingLayer::new( From 73e37ae388f4c5b6473b89baa3ba5a919c06e548 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Wed, 12 Mar 2025 17:23:31 +0400 Subject: [PATCH 157/207] Suppress "request was dropped" errors in test_timeline_archive (#11190) ## Problem Test `test_timeline_archive` is flaky because it makes requests that are intended to fail. It sometimes leads to warning in pageserver's logs. More details are in the issue. - Closes: https://github.com/neondatabase/neon/issues/11177 ## Summary of changes - Suppress such errors. --- test_runner/regress/test_timeline_archive.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index c17840d31c..11567cafd0 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -42,6 +42,14 @@ def test_timeline_archive(neon_env_builder: NeonEnvBuilder, shard_count: int): # If we run the unsharded version, talk to the storage controller ps_http = env.storage_controller.pageserver_api() + for ps in env.pageservers: + # We make /archival_config requests that are intended to fail. + # It's expected that storcon drops requests to other pageservers after + # it gets the first error (https://github.com/neondatabase/neon/issues/11177) + ps.allowed_errors.append( + ".*WARN.* path=/v1/tenant/.*/archival_config .*request was dropped before completing", + ) + # first try to archive a non existing timeline for an existing tenant: invalid_timeline_id = TimelineId.generate() with pytest.raises(PageserverApiException, match="timeline not found") as exc: From 7015dbbdf001bff6670a2027fb609083dcd06e10 Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 12 Mar 2025 14:02:11 +0000 Subject: [PATCH 158/207] storcon_cli: remove pre-warm helper (#11183) ## Problem This command was used when onboarding tenants to the storage controller. We no longer do that, so the command can go. ## Summary of changes - Remove `storcon_cli tenant-warmup` command --- control_plane/storcon_cli/src/main.rs | 98 +-------------------------- 1 file changed, 2 insertions(+), 96 deletions(-) diff --git a/control_plane/storcon_cli/src/main.rs b/control_plane/storcon_cli/src/main.rs index b5c4f21e97..ae4bf9a519 100644 --- a/control_plane/storcon_cli/src/main.rs +++ b/control_plane/storcon_cli/src/main.rs @@ -14,8 +14,8 @@ use pageserver_api::controller_api::{ TenantShardMigrateRequest, TenantShardMigrateResponse, }; use pageserver_api::models::{ - EvictionPolicy, EvictionPolicyLayerAccessThreshold, LocationConfigSecondary, ShardParameters, - TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest, + EvictionPolicy, EvictionPolicyLayerAccessThreshold, ShardParameters, TenantConfig, + TenantConfigPatchRequest, TenantConfigRequest, TenantShardSplitRequest, TenantShardSplitResponse, }; use pageserver_api::shard::{ShardStripeSize, TenantShardId}; @@ -158,12 +158,6 @@ enum Command { #[arg(long)] tenant_id: TenantId, }, - /// For a tenant which hasn't been onboarded to the storage controller yet, add it in secondary - /// mode so that it can warm up content on a pageserver. - TenantWarmup { - #[arg(long)] - tenant_id: TenantId, - }, TenantSetPreferredAz { #[arg(long)] tenant_id: TenantId, @@ -871,94 +865,6 @@ async fn main() -> anyhow::Result<()> { ) .await?; } - Command::TenantWarmup { tenant_id } => { - let describe_response = storcon_client - .dispatch::<(), TenantDescribeResponse>( - Method::GET, - format!("control/v1/tenant/{tenant_id}"), - None, - ) - .await; - match describe_response { - Ok(describe) => { - if matches!(describe.policy, PlacementPolicy::Secondary) { - // Fine: it's already known to controller in secondary mode: calling - // again to put it into secondary mode won't cause problems. - } else { - anyhow::bail!("Tenant already present with policy {:?}", describe.policy); - } - } - Err(mgmt_api::Error::ApiError(StatusCode::NOT_FOUND, _)) => { - // Fine: this tenant isn't know to the storage controller yet. - } - Err(e) => { - // Unexpected API error - return Err(e.into()); - } - } - - vps_client - .location_config( - TenantShardId::unsharded(tenant_id), - pageserver_api::models::LocationConfig { - mode: pageserver_api::models::LocationConfigMode::Secondary, - generation: None, - secondary_conf: Some(LocationConfigSecondary { warm: true }), - shard_number: 0, - shard_count: 0, - shard_stripe_size: ShardParameters::DEFAULT_STRIPE_SIZE.0, - tenant_conf: TenantConfig::default(), - }, - None, - true, - ) - .await?; - - let describe_response = storcon_client - .dispatch::<(), TenantDescribeResponse>( - Method::GET, - format!("control/v1/tenant/{tenant_id}"), - None, - ) - .await?; - - let secondary_ps_id = describe_response - .shards - .first() - .unwrap() - .node_secondary - .first() - .unwrap(); - - println!("Tenant {tenant_id} warming up on pageserver {secondary_ps_id}"); - loop { - let (status, progress) = vps_client - .tenant_secondary_download( - TenantShardId::unsharded(tenant_id), - Some(Duration::from_secs(10)), - ) - .await?; - println!( - "Progress: {}/{} layers, {}/{} bytes", - progress.layers_downloaded, - progress.layers_total, - progress.bytes_downloaded, - progress.bytes_total - ); - match status { - StatusCode::OK => { - println!("Download complete"); - break; - } - StatusCode::ACCEPTED => { - // Loop - } - _ => { - anyhow::bail!("Unexpected download status: {status}"); - } - } - } - } Command::TenantDrop { tenant_id, unclean } => { if !unclean { anyhow::bail!( From fc515e7be2d7efb5627b0119ec293dd31c48007a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Wed, 12 Mar 2025 15:26:52 +0100 Subject: [PATCH 159/207] chore(deps): bump env_logger to 0.11.7 (#11188) ## Problem `humantime` is unmaintained, we want to migrate to `jiff`, see https://github.com/neondatabase/neon/issues/11179. `env_logger` in older versions depend on `humantime`, and newer versions depend on `jiff`, so we need to update it. ## Summary of changes Update `env_logger` to the most recent release, which does not depend on `humantime` anymore. --- Cargo.lock | 180 +++++++++++++++++++++----------------- Cargo.toml | 2 +- workspace_hack/Cargo.toml | 3 + 3 files changed, 103 insertions(+), 82 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dd13e5a833..1721c185f0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -191,7 +191,7 @@ checksum = "965c2d33e53cb6b267e148a4cb0760bc01f4904c1cd4bb4002a085bb016d1490" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", "synstructure", ] @@ -203,7 +203,7 @@ checksum = "7b18050c2cd6fe86c3a76584ef5e0baf286d038cda203eb6223df2cc413565f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -272,7 +272,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -283,7 +283,7 @@ checksum = "b9ccdd8f2a161be9bd5c023df56f1b2a0bd1d83872ae53b71a84a12c9bf6e842" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1021,7 +1021,7 @@ dependencies = [ "regex", "rustc-hash 2.1.1", "shlex", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1248,7 +1248,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1703,7 +1703,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1727,7 +1727,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1738,7 +1738,7 @@ checksum = "29a358ff9f12ec09c3e61fef9b5a9902623a695a46a917b07f269bff1445611a" dependencies = [ "darling_core", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1888,7 +1888,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1908,7 +1908,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25" dependencies = [ - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1937,7 +1937,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -1960,7 +1960,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -2105,7 +2105,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -2115,28 +2115,19 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0" dependencies = [ "log", -] - -[[package]] -name = "env_logger" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd405aab171cb85d6735e5c8d9db038c17d3ca007a4d2c25f337935c3d90580" -dependencies = [ - "humantime", - "is-terminal", - "log", "regex", - "termcolor", ] [[package]] name = "env_logger" -version = "0.11.2" +version = "0.11.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c012a26a7f605efc424dd53697843a72be7dc86ad2d01f7814337794a12231d" +checksum = "c3716d7a920fb4fac5d84e9d4bce8ceb321e9414b4409da61b07b75c1e3d0697" dependencies = [ + "anstream", + "anstyle", "env_filter", + "jiff", "log", ] @@ -2157,7 +2148,7 @@ checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -2417,7 +2408,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -2530,7 +2521,7 @@ checksum = "53010ccb100b96a67bc32c0175f0ed1426b31b655d562898e57325f81c023ac0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -3148,7 +3139,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -3241,7 +3232,7 @@ dependencies = [ "crossbeam-channel", "crossbeam-utils", "dashmap 6.1.0", - "env_logger 0.11.2", + "env_logger", "indexmap 2.0.1", "itoa", "log", @@ -3364,6 +3355,30 @@ dependencies = [ "tracing", ] +[[package]] +name = "jiff" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d699bc6dfc879fb1bf9bdff0d4c56f0884fc6f0d0eb0fba397a6d00cd9a6b85e" +dependencies = [ + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", +] + +[[package]] +name = "jiff-static" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d16e75759ee0aa64c57a56acbf43916987b20c77373cb7e808979e02b93c9f9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "jobserver" version = "0.1.32" @@ -3535,9 +3550,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e" [[package]] name = "lru" @@ -3618,7 +3633,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -4491,7 +4506,7 @@ dependencies = [ "parquet", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -4593,7 +4608,7 @@ checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -4689,6 +4704,15 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +[[package]] +name = "portable-atomic-util" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +dependencies = [ + "portable-atomic", +] + [[package]] name = "postgres" version = "0.19.7" @@ -4796,7 +4820,7 @@ dependencies = [ "bytes", "crc32c", "criterion", - "env_logger 0.10.2", + "env_logger", "log", "memoffset 0.9.0", "once_cell", @@ -4895,7 +4919,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -4909,9 +4933,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" dependencies = [ "unicode-ident", ] @@ -4995,7 +5019,7 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.90", + "syn 2.0.100", "tempfile", ] @@ -5016,7 +5040,7 @@ dependencies = [ "prost 0.13.3", "prost-types 0.13.3", "regex", - "syn 2.0.90", + "syn 2.0.100", "tempfile", ] @@ -5030,7 +5054,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -5043,7 +5067,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -5090,7 +5114,7 @@ dependencies = [ "consumption_metrics", "ecdsa 0.16.9", "ed25519-dalek", - "env_logger 0.10.2", + "env_logger", "fallible-iterator", "flate2", "framed-websockets", @@ -5227,9 +5251,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.37" +version = "1.0.39" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" +checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801" dependencies = [ "proc-macro2", ] @@ -5758,7 +5782,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.90", + "syn 2.0.100", "unicode-ident", ] @@ -5973,7 +5997,7 @@ dependencies = [ "crc32c", "criterion", "desim", - "env_logger 0.10.2", + "env_logger", "fail", "futures", "hex", @@ -6304,7 +6328,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6386,7 +6410,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6788,7 +6812,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6839,9 +6863,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.90" +version = "2.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" +checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" dependencies = [ "proc-macro2", "quote", @@ -6871,7 +6895,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6922,15 +6946,6 @@ dependencies = [ "serde_json", ] -[[package]] -name = "termcolor" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" -dependencies = [ - "winapi-util", -] - [[package]] name = "test-context" version = "0.3.0" @@ -6949,7 +6964,7 @@ checksum = "78ea17a2dc368aeca6f554343ced1b1e31f76d63683fa8016e5844bd7a5144a1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6978,7 +6993,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -6989,7 +7004,7 @@ checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -7172,7 +7187,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -7405,7 +7420,7 @@ dependencies = [ "prost-build 0.13.3", "prost-types 0.13.3", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -7520,7 +7535,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -7915,7 +7930,7 @@ dependencies = [ "anyhow", "camino-tempfile", "clap", - "env_logger 0.10.2", + "env_logger", "log", "postgres", "postgres_ffi", @@ -8020,7 +8035,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", "wasm-bindgen-shared", ] @@ -8054,7 +8069,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -8361,6 +8376,7 @@ name = "workspace_hack" version = "0.1.0" dependencies = [ "ahash", + "anstream", "anyhow", "base64 0.13.1", "base64 0.21.7", @@ -8377,6 +8393,8 @@ dependencies = [ "digest", "displaydoc", "either", + "env_filter", + "env_logger", "fail", "form_urlencoded", "futures-channel", @@ -8429,7 +8447,7 @@ dependencies = [ "spki 0.7.3", "stable_deref_trait", "subtle", - "syn 2.0.90", + "syn 2.0.100", "sync_wrapper 0.1.2", "tikv-jemalloc-ctl", "tikv-jemalloc-sys", @@ -8546,7 +8564,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", "synstructure", ] @@ -8568,7 +8586,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -8588,7 +8606,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", "synstructure", ] @@ -8610,7 +8628,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] @@ -8632,7 +8650,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.90", + "syn 2.0.100", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 4a32b6d95d..7b86a64e9a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -221,7 +221,7 @@ zerocopy = { version = "0.7", features = ["derive"] } json-structural-diff = { version = "0.2.0" } ## TODO replace this with tracing -env_logger = "0.10" +env_logger = "0.11" log = "0.4" ## Libraries from neondatabase/ git forks, ideally with changes to be upstreamed diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index 183cc66ab9..f1696c5ff9 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -16,6 +16,7 @@ license.workspace = true ### BEGIN HAKARI SECTION [dependencies] ahash = { version = "0.8" } +anstream = { version = "0.6" } anyhow = { version = "1", features = ["backtrace"] } base64-594e8ee84c453af0 = { package = "base64", version = "0.13", features = ["alloc"] } base64-647d43efb71741da = { package = "base64", version = "0.21" } @@ -30,6 +31,8 @@ der = { version = "0.7", default-features = false, features = ["oid", "pem", "st deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] } digest = { version = "0.10", features = ["mac", "oid", "std"] } either = { version = "1" } +env_filter = { version = "0.1", default-features = false, features = ["regex"] } +env_logger = { version = "0.11" } fail = { version = "0.5", default-features = false, features = ["failpoints"] } form_urlencoded = { version = "1" } futures-channel = { version = "0.3", features = ["sink"] } From 1436b8469c6fe52f37fb9ba41a3f854313cacd79 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 12 Mar 2025 15:34:29 +0100 Subject: [PATCH 160/207] pageserver: appease unused lint on macOS (#11192) ## Problem `info_span!` is only used in a `linux` branch, causing the unused lint to fire on macOS. ## Summary of changes Fully qualify the `info_span!` use. --- pageserver/src/tenant/remote_timeline_client/download.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pageserver/src/tenant/remote_timeline_client/download.rs b/pageserver/src/tenant/remote_timeline_client/download.rs index 954ff0c1d6..0001f67c99 100644 --- a/pageserver/src/tenant/remote_timeline_client/download.rs +++ b/pageserver/src/tenant/remote_timeline_client/download.rs @@ -18,7 +18,7 @@ use tokio::fs::{self, File, OpenOptions}; use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio_util::io::StreamReader; use tokio_util::sync::CancellationToken; -use tracing::{info_span, warn}; +use tracing::warn; use utils::crashsafe::path_with_suffix_extension; use utils::id::{TenantId, TimelineId}; use utils::{backoff, pausable_failpoint}; @@ -229,7 +229,7 @@ async fn download_object( || IoBufferMut::with_capacity(super::BUFFER_SIZE), gate.enter().map_err(|_| DownloadError::Cancelled)?, ctx, - info_span!(parent: None, "download_object_buffered_writer", %dst_path), + tracing::info_span!(parent: None, "download_object_buffered_writer", %dst_path), ); // TODO: use vectored write (writev) once supported by tokio-epoll-uring. From c7717c85c7a45144b016e5d799125197ba14c001 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 12 Mar 2025 16:16:54 +0100 Subject: [PATCH 161/207] storcon,pageserver: use persisted stripe size when loading unsharded tenants (#11193) ## Problem When the storage controller and Pageserver loads tenants from persisted storage, it uses `ShardIdentity::unsharded()` for unsharded tenants. However, this replaces the persisted stripe size of unsharded tenants with the default stripe size. This doesn't really matter for practical purposes, since the stripe size is meaningless for unsharded tenants anyway, but can cause consistency check failures if the persisted stripe size differs from the default. This was seen in #11168, where we change the default stripe size. Touches #11168. ## Summary of changes Carry over the persisted stripe size from `TenantShardPersistence` for unsharded tenants, and from `LocationConf` on Pageservers. Also add bounds checks for type casts when loading persisted shard metadata. --- libs/pageserver_api/src/shard.rs | 10 +++++++ pageserver/src/tenant/config.rs | 6 +++- storage_controller/src/persistence.rs | 40 ++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/libs/pageserver_api/src/shard.rs b/libs/pageserver_api/src/shard.rs index eca04b1f3d..8386d6e586 100644 --- a/libs/pageserver_api/src/shard.rs +++ b/libs/pageserver_api/src/shard.rs @@ -112,6 +112,16 @@ impl ShardIdentity { } } + /// An unsharded identity with the given stripe size (if non-zero). This is typically used to + /// carry over a stripe size for an unsharded tenant from persistent storage. + pub fn unsharded_with_stripe_size(stripe_size: ShardStripeSize) -> Self { + let mut shard_identity = Self::unsharded(); + if stripe_size.0 > 0 { + shard_identity.stripe_size = stripe_size; + } + shard_identity + } + /// A broken instance of this type is only used for `TenantState::Broken` tenants, /// which are constructed in code paths that don't have access to proper configuration. /// diff --git a/pageserver/src/tenant/config.rs b/pageserver/src/tenant/config.rs index 334fb04604..4308db84e5 100644 --- a/pageserver/src/tenant/config.rs +++ b/pageserver/src/tenant/config.rs @@ -219,7 +219,11 @@ impl LocationConf { }; let shard = if conf.shard_count == 0 { - ShardIdentity::unsharded() + // NB: carry over the persisted stripe size instead of using the default. This doesn't + // matter for most practical purposes, since unsharded tenants don't use the stripe + // size, but can cause inconsistencies between storcon and Pageserver and cause manual + // splits without `new_stripe_size` to use an unintended stripe size. + ShardIdentity::unsharded_with_stripe_size(ShardStripeSize(conf.shard_stripe_size)) } else { ShardIdentity::new( ShardNumber(conf.shard_number), diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 5146fe472e..4a97aac125 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -1613,23 +1613,49 @@ pub(crate) struct TenantShardPersistence { } impl TenantShardPersistence { + fn get_shard_count(&self) -> Result { + self.shard_count + .try_into() + .map(ShardCount) + .map_err(|_| ShardConfigError::InvalidCount) + } + + fn get_shard_number(&self) -> Result { + self.shard_number + .try_into() + .map(ShardNumber) + .map_err(|_| ShardConfigError::InvalidNumber) + } + + fn get_stripe_size(&self) -> Result { + self.shard_stripe_size + .try_into() + .map(ShardStripeSize) + .map_err(|_| ShardConfigError::InvalidStripeSize) + } + pub(crate) fn get_shard_identity(&self) -> Result { if self.shard_count == 0 { - Ok(ShardIdentity::unsharded()) + // NB: carry over the stripe size from the persisted record, to avoid consistency check + // failures if the persisted value differs from the default stripe size. The stripe size + // doesn't really matter for unsharded tenants anyway. + Ok(ShardIdentity::unsharded_with_stripe_size( + self.get_stripe_size()?, + )) } else { Ok(ShardIdentity::new( - ShardNumber(self.shard_number as u8), - ShardCount::new(self.shard_count as u8), - ShardStripeSize(self.shard_stripe_size as u32), + self.get_shard_number()?, + self.get_shard_count()?, + self.get_stripe_size()?, )?) } } - pub(crate) fn get_tenant_shard_id(&self) -> Result { + pub(crate) fn get_tenant_shard_id(&self) -> anyhow::Result { Ok(TenantShardId { tenant_id: TenantId::from_str(self.tenant_id.as_str())?, - shard_number: ShardNumber(self.shard_number as u8), - shard_count: ShardCount::new(self.shard_count as u8), + shard_number: self.get_shard_number()?, + shard_count: self.get_shard_count()?, }) } } From 02a83913eccd6488216139ddc151a06f354b4f1a Mon Sep 17 00:00:00 2001 From: Vlad Lazar Date: Wed, 12 Mar 2025 15:31:28 +0000 Subject: [PATCH 162/207] storcon: do not update observed state on node activation (#11155) ## Problem When a node becomes active, we query its locations and update the observed state in-place. This can race with the observed state updates done when processing reconcile results. ## Summary of changes The argument for this reconciliation step is that is reduces the need for background reconciliations. I don't think is actually true anymore. There's two cases. 1. Restart of node after drain. Usually the node does not go through the offline state here, so observed locations were not marked as none. In any case, there should be a handful of shards max on the node since we've just drained it. 2. Node comes back online after failure or network partition. When the node is marked offline, we reschedule everything away from it. When it later becomes active, the previous observed location is extraneous and requires a reconciliation anyway. Closes https://github.com/neondatabase/neon/issues/11148 --- control_plane/src/bin/neon_local.rs | 14 +++++++--- control_plane/src/storage_controller.rs | 7 ++++- storage_controller/src/service.rs | 26 ++++++++++++++++--- .../regress/test_storage_controller.py | 11 +++++--- test_runner/regress/test_tenant_size.py | 2 +- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index ba1411b615..72ebbafd3b 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -36,7 +36,9 @@ use pageserver_api::config::{ use pageserver_api::controller_api::{ NodeAvailabilityWrapper, PlacementPolicy, TenantCreateRequest, }; -use pageserver_api::models::{ShardParameters, TimelineCreateRequest, TimelineInfo}; +use pageserver_api::models::{ + ShardParameters, TenantConfigRequest, TimelineCreateRequest, TimelineInfo, +}; use pageserver_api::shard::{ShardCount, ShardStripeSize, TenantShardId}; use postgres_backend::AuthType; use postgres_connection::parse_host_port; @@ -1129,12 +1131,16 @@ async fn handle_tenant(subcmd: &TenantCmd, env: &mut local_env::LocalEnv) -> any let tenant_id = get_tenant_id(args.tenant_id, env)?; let tenant_conf: HashMap<_, _> = args.config.iter().flat_map(|c| c.split_once(':')).collect(); + let config = PageServerNode::parse_config(tenant_conf)?; - pageserver - .tenant_config(tenant_id, tenant_conf) + let req = TenantConfigRequest { tenant_id, config }; + + let storage_controller = StorageController::from_env(env); + storage_controller + .set_tenant_config(&req) .await .with_context(|| format!("Tenant config failed for tenant with id {tenant_id}"))?; - println!("tenant {tenant_id} successfully configured on the pageserver"); + println!("tenant {tenant_id} successfully configured via storcon"); } } Ok(()) diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index 439d7936a7..bbd7f67720 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -14,7 +14,7 @@ use pageserver_api::controller_api::{ NodeConfigureRequest, NodeDescribeResponse, NodeRegisterRequest, TenantCreateRequest, TenantCreateResponse, TenantLocateResponse, }; -use pageserver_api::models::{TimelineCreateRequest, TimelineInfo}; +use pageserver_api::models::{TenantConfigRequest, TimelineCreateRequest, TimelineInfo}; use pageserver_api::shard::TenantShardId; use pageserver_client::mgmt_api::ResponseErrorMessageExt; use postgres_backend::AuthType; @@ -878,4 +878,9 @@ impl StorageController { ) .await } + + pub async fn set_tenant_config(&self, req: &TenantConfigRequest) -> anyhow::Result<()> { + self.dispatch(Method::PUT, "v1/tenant/config".to_string(), Some(req)) + .await + } } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 96b67fa81e..667b53b725 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -2004,21 +2004,41 @@ impl Service { tracing::info!("Loaded {} LocationConfigs", configs.tenant_shards.len()); let mut cleanup = Vec::new(); + let mut mismatched_locations = 0; { let mut locked = self.inner.write().unwrap(); - for (tenant_shard_id, observed_loc) in configs.tenant_shards { + for (tenant_shard_id, reported) in configs.tenant_shards { let Some(tenant_shard) = locked.tenants.get_mut(&tenant_shard_id) else { cleanup.push(tenant_shard_id); continue; }; - tenant_shard + + let on_record = &mut tenant_shard .observed .locations - .insert(node.get_id(), ObservedStateLocation { conf: observed_loc }); + .entry(node.get_id()) + .or_insert_with(|| ObservedStateLocation { conf: None }) + .conf; + + // If the location reported by the node does not match our observed state, + // then we mark it as uncertain and let the background reconciliation loop + // deal with it. + // + // Note that this also covers net new locations reported by the node. + if *on_record != reported { + mismatched_locations += 1; + *on_record = None; + } } } + if mismatched_locations > 0 { + tracing::info!( + "Set observed state to None for {mismatched_locations} mismatched locations" + ); + } + for tenant_shard_id in cleanup { tracing::info!("Detaching {tenant_shard_id}"); match node diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 29919f2fe7..5eaf69cfa1 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -1749,18 +1749,23 @@ def test_storage_controller_re_attach(neon_env_builder: NeonEnvBuilder): # Restart the failed pageserver victim_ps.start() + env.storage_controller.reconcile_until_idle() + # We expect that the re-attach call correctly tipped off the pageserver that its locations # are all secondaries now. locations = victim_ps.http_client().tenant_list_locations()["tenant_shards"] assert len(locations) == 2 assert all(loc[1]["mode"] == "Secondary" for loc in locations) - # We expect that this situation resulted from the re_attach call, and not any explicit - # Reconciler runs: assert that the reconciliation count has not gone up since we restarted. + # We expect that this situation resulted from background reconciliations + # Reconciler runs: assert that the reconciliation count has gone up by exactly + # one for each shard reconciles_after_restart = env.storage_controller.get_metric_value( "storage_controller_reconcile_complete_total", filter={"status": "ok"} ) - assert reconciles_after_restart == reconciles_before_restart + + assert reconciles_before_restart is not None + assert reconciles_after_restart == reconciles_before_restart + 2 def test_storage_controller_shard_scheduling_policy(neon_env_builder: NeonEnvBuilder): diff --git a/test_runner/regress/test_tenant_size.py b/test_runner/regress/test_tenant_size.py index 713f89c60f..81e727a3aa 100644 --- a/test_runner/regress/test_tenant_size.py +++ b/test_runner/regress/test_tenant_size.py @@ -436,7 +436,7 @@ def test_single_branch_get_tenant_size_grows( # when our tenant is configured with a tiny pitr interval, dropping a table should # cause synthetic size to go down immediately tenant_config["pitr_interval"] = "0s" - env.pageserver.http_client().set_tenant_config(tenant_id, tenant_config) + env.storage_controller.pageserver_api().set_tenant_config(tenant_id, tenant_config) (current_lsn, size) = get_current_consistent_size( env, endpoint, size_debug_file, http_client, tenant_id, timeline_id ) From 40672b739eb26b154d973be218b817727e0e9a91 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Wed, 12 Mar 2025 10:34:46 -0500 Subject: [PATCH 163/207] Move maybe_add_request_id_header middleware into middleware module (#11187) This matches the authorization middleware. --------- Signed-off-by: Tristan Partin Co-authored-by: Mikhail Kot --- compute_tools/src/http/middleware/mod.rs | 1 + .../src/http/middleware/request_id.rs | 16 ++++++++++++++++ compute_tools/src/http/server.rs | 19 +++---------------- 3 files changed, 20 insertions(+), 16 deletions(-) create mode 100644 compute_tools/src/http/middleware/request_id.rs diff --git a/compute_tools/src/http/middleware/mod.rs b/compute_tools/src/http/middleware/mod.rs index caeeeedfe5..147d6d2c7d 100644 --- a/compute_tools/src/http/middleware/mod.rs +++ b/compute_tools/src/http/middleware/mod.rs @@ -1 +1,2 @@ pub(in crate::http) mod authorize; +pub(in crate::http) mod request_id; diff --git a/compute_tools/src/http/middleware/request_id.rs b/compute_tools/src/http/middleware/request_id.rs new file mode 100644 index 0000000000..e685b27d91 --- /dev/null +++ b/compute_tools/src/http/middleware/request_id.rs @@ -0,0 +1,16 @@ +use axum::{extract::Request, middleware::Next, response::Response}; +use uuid::Uuid; + +use crate::http::headers::X_REQUEST_ID; + +/// This middleware function allows compute_ctl to generate its own request ID +/// if one isn't supplied. The control plane will always send one as a UUID. The +/// neon Postgres extension on the other hand does not send one. +pub async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response { + let headers = request.headers_mut(); + if !headers.contains_key(X_REQUEST_ID) { + headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap()); + } + + next.run(request).await +} diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index 126fa86d1c..b70b6c619c 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -5,9 +5,8 @@ use std::time::Duration; use anyhow::Result; use axum::Router; -use axum::extract::Request; -use axum::middleware::{self, Next}; -use axum::response::{IntoResponse, Response}; +use axum::middleware::{self}; +use axum::response::IntoResponse; use axum::routing::{get, post}; use http::StatusCode; use jsonwebtoken::jwk::JwkSet; @@ -17,8 +16,8 @@ use tower_http::{ auth::AsyncRequireAuthorizationLayer, request_id::PropagateRequestIdLayer, trace::TraceLayer, }; use tracing::{Span, error, info}; -use uuid::Uuid; +use super::middleware::request_id::maybe_add_request_id_header; use super::{ headers::X_REQUEST_ID, middleware::authorize::Authorize, @@ -219,15 +218,3 @@ impl Server { tokio::spawn(self.serve(state)); } } - -/// This middleware function allows compute_ctl to generate its own request ID -/// if one isn't supplied. The control plane will always send one as a UUID. The -/// neon Postgres extension on the other hand does not send one. -async fn maybe_add_request_id_header(mut request: Request, next: Next) -> Response { - let headers = request.headers_mut(); - if headers.get(X_REQUEST_ID).is_none() { - headers.append(X_REQUEST_ID, Uuid::new_v4().to_string().parse().unwrap()); - } - - next.run(request).await -} From 7aec1364dd4cfa26571abcf0cac79e663260ca6a Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Wed, 12 Mar 2025 15:47:17 +0000 Subject: [PATCH 164/207] chore(proxy): remove enum and composite type queries (#11178) In our json encoding, we only need to know about array types. Information about composites or enums are not actually used. Enums are quite popular, needing to type query them when not needed can add some latency cost for no gain. --- libs/proxy/postgres-types2/src/lib.rs | 10 +- libs/proxy/tokio-postgres2/src/client.rs | 119 ++-------------- .../tokio-postgres2/src/generic_client.rs | 10 +- libs/proxy/tokio-postgres2/src/prepare.rs | 133 ++++-------------- libs/proxy/tokio-postgres2/src/transaction.rs | 5 + proxy/src/serverless/sql_over_http.rs | 16 ++- 6 files changed, 69 insertions(+), 224 deletions(-) diff --git a/libs/proxy/postgres-types2/src/lib.rs b/libs/proxy/postgres-types2/src/lib.rs index 0ccd8c295f..b6bcabc922 100644 --- a/libs/proxy/postgres-types2/src/lib.rs +++ b/libs/proxy/postgres-types2/src/lib.rs @@ -135,8 +135,8 @@ impl Type { pub enum Kind { /// A simple type like `VARCHAR` or `INTEGER`. Simple, - /// An enumerated type along with its variants. - Enum(Vec), + /// An enumerated type. + Enum, /// A pseudo-type. Pseudo, /// An array type along with the type of its elements. @@ -146,9 +146,9 @@ pub enum Kind { /// A multirange type along with the type of its elements. Multirange(Type), /// A domain type along with its underlying type. - Domain(Type), - /// A composite type along with information about its fields. - Composite(Vec), + Domain(Oid), + /// A composite type. + Composite(Oid), } /// Information about a field of a composite type. diff --git a/libs/proxy/tokio-postgres2/src/client.rs b/libs/proxy/tokio-postgres2/src/client.rs index 08a06163e1..186eb07000 100644 --- a/libs/proxy/tokio-postgres2/src/client.rs +++ b/libs/proxy/tokio-postgres2/src/client.rs @@ -19,10 +19,10 @@ use crate::config::{Host, SslMode}; use crate::connection::{Request, RequestMessages}; use crate::query::RowStream; use crate::simple_query::SimpleQueryStream; -use crate::types::{Oid, ToSql, Type}; +use crate::types::{Oid, Type}; use crate::{ - CancelToken, Error, ReadyForQueryStatus, Row, SimpleQueryMessage, Statement, Transaction, - TransactionBuilder, query, simple_query, slice_iter, + CancelToken, Error, ReadyForQueryStatus, SimpleQueryMessage, Statement, Transaction, + TransactionBuilder, query, simple_query, }; pub struct Responses { @@ -54,26 +54,18 @@ impl Responses { /// A cache of type info and prepared statements for fetching type info /// (corresponding to the queries in the [crate::prepare] module). #[derive(Default)] -struct CachedTypeInfo { +pub(crate) struct CachedTypeInfo { /// A statement for basic information for a type from its /// OID. Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_QUERY) (or its /// fallback). - typeinfo: Option, - /// A statement for getting information for a composite type from its OID. - /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY). - typeinfo_composite: Option, - /// A statement for getting information for a composite type from its OID. - /// Corresponds to [TYPEINFO_QUERY](crate::prepare::TYPEINFO_COMPOSITE_QUERY) (or - /// its fallback). - typeinfo_enum: Option, + pub(crate) typeinfo: Option, /// Cache of types already looked up. - types: HashMap, + pub(crate) types: HashMap, } pub struct InnerClient { sender: mpsc::UnboundedSender, - cached_typeinfo: Mutex, /// A buffer to use when writing out postgres commands. buffer: Mutex, @@ -91,38 +83,6 @@ impl InnerClient { }) } - pub fn typeinfo(&self) -> Option { - self.cached_typeinfo.lock().typeinfo.clone() - } - - pub fn set_typeinfo(&self, statement: &Statement) { - self.cached_typeinfo.lock().typeinfo = Some(statement.clone()); - } - - pub fn typeinfo_composite(&self) -> Option { - self.cached_typeinfo.lock().typeinfo_composite.clone() - } - - pub fn set_typeinfo_composite(&self, statement: &Statement) { - self.cached_typeinfo.lock().typeinfo_composite = Some(statement.clone()); - } - - pub fn typeinfo_enum(&self) -> Option { - self.cached_typeinfo.lock().typeinfo_enum.clone() - } - - pub fn set_typeinfo_enum(&self, statement: &Statement) { - self.cached_typeinfo.lock().typeinfo_enum = Some(statement.clone()); - } - - pub fn type_(&self, oid: Oid) -> Option { - self.cached_typeinfo.lock().types.get(&oid).cloned() - } - - pub fn set_type(&self, oid: Oid, type_: &Type) { - self.cached_typeinfo.lock().types.insert(oid, type_.clone()); - } - /// Call the given function with a buffer to be used when writing out /// postgres commands. pub fn with_buf(&self, f: F) -> R @@ -142,7 +102,6 @@ pub struct SocketConfig { pub host: Host, pub port: u16, pub connect_timeout: Option, - // pub keepalive: Option, } /// An asynchronous PostgreSQL client. @@ -151,6 +110,7 @@ pub struct SocketConfig { /// through this client object. pub struct Client { inner: Arc, + cached_typeinfo: CachedTypeInfo, socket_config: SocketConfig, ssl_mode: SslMode, @@ -169,9 +129,9 @@ impl Client { Client { inner: Arc::new(InnerClient { sender, - cached_typeinfo: Default::default(), buffer: Default::default(), }), + cached_typeinfo: Default::default(), socket_config, ssl_mode, @@ -189,55 +149,6 @@ impl Client { &self.inner } - /// Executes a statement, returning a vector of the resulting rows. - /// - /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list - /// provided, 1-indexed. - /// - /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be - /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front - /// with the `prepare` method. - /// - /// # Panics - /// - /// Panics if the number of parameters provided does not match the number expected. - pub async fn query( - &self, - statement: Statement, - params: &[&(dyn ToSql + Sync)], - ) -> Result, Error> { - self.query_raw(statement, slice_iter(params)) - .await? - .try_collect() - .await - } - - /// The maximally flexible version of [`query`]. - /// - /// A statement may contain parameters, specified by `$n`, where `n` is the index of the parameter of the list - /// provided, 1-indexed. - /// - /// The `statement` argument can either be a `Statement`, or a raw query string. If the same statement will be - /// repeatedly executed (perhaps with different query parameters), consider preparing the statement up front - /// with the `prepare` method. - /// - /// # Panics - /// - /// Panics if the number of parameters provided does not match the number expected. - /// - /// [`query`]: #method.query - pub async fn query_raw<'a, I>( - &self, - statement: Statement, - params: I, - ) -> Result - where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, - { - query::query(&self.inner, statement, params).await - } - /// Pass text directly to the Postgres backend to allow it to sort out typing itself and /// to save a roundtrip pub async fn query_raw_txt(&self, statement: &str, params: I) -> Result @@ -284,14 +195,10 @@ impl Client { simple_query::batch_execute(self.inner(), query).await } - pub async fn discard_all(&self) -> Result { + pub async fn discard_all(&mut self) -> Result { // clear the prepared statements that are about to be nuked from the postgres session - { - let mut typeinfo = self.inner.cached_typeinfo.lock(); - typeinfo.typeinfo = None; - typeinfo.typeinfo_composite = None; - typeinfo.typeinfo_enum = None; - } + + self.cached_typeinfo.typeinfo = None; self.batch_execute("discard all").await } @@ -359,8 +266,8 @@ impl Client { } /// Query for type information - pub async fn get_type(&self, oid: Oid) -> Result { - crate::prepare::get_type(&self.inner, oid).await + pub(crate) async fn get_type_inner(&mut self, oid: Oid) -> Result { + crate::prepare::get_type(&self.inner, &mut self.cached_typeinfo, oid).await } /// Determines if the connection to the server has already closed. diff --git a/libs/proxy/tokio-postgres2/src/generic_client.rs b/libs/proxy/tokio-postgres2/src/generic_client.rs index 31c3d8fa3e..8e28843347 100644 --- a/libs/proxy/tokio-postgres2/src/generic_client.rs +++ b/libs/proxy/tokio-postgres2/src/generic_client.rs @@ -22,7 +22,7 @@ pub trait GenericClient: private::Sealed { I::IntoIter: ExactSizeIterator + Sync + Send; /// Query for type information - async fn get_type(&self, oid: Oid) -> Result; + async fn get_type(&mut self, oid: Oid) -> Result; } impl private::Sealed for Client {} @@ -38,8 +38,8 @@ impl GenericClient for Client { } /// Query for type information - async fn get_type(&self, oid: Oid) -> Result { - crate::prepare::get_type(self.inner(), oid).await + async fn get_type(&mut self, oid: Oid) -> Result { + self.get_type_inner(oid).await } } @@ -56,7 +56,7 @@ impl GenericClient for Transaction<'_> { } /// Query for type information - async fn get_type(&self, oid: Oid) -> Result { - self.client().get_type(oid).await + async fn get_type(&mut self, oid: Oid) -> Result { + self.client_mut().get_type(oid).await } } diff --git a/libs/proxy/tokio-postgres2/src/prepare.rs b/libs/proxy/tokio-postgres2/src/prepare.rs index b36d2e5f74..ba13a528f6 100644 --- a/libs/proxy/tokio-postgres2/src/prepare.rs +++ b/libs/proxy/tokio-postgres2/src/prepare.rs @@ -9,10 +9,10 @@ use log::debug; use postgres_protocol2::message::backend::Message; use postgres_protocol2::message::frontend; -use crate::client::InnerClient; +use crate::client::{CachedTypeInfo, InnerClient}; use crate::codec::FrontendMessage; use crate::connection::RequestMessages; -use crate::types::{Field, Kind, Oid, Type}; +use crate::types::{Kind, Oid, Type}; use crate::{Column, Error, Statement, query, slice_iter}; pub(crate) const TYPEINFO_QUERY: &str = "\ @@ -23,23 +23,7 @@ INNER JOIN pg_catalog.pg_namespace n ON t.typnamespace = n.oid WHERE t.oid = $1 "; -const TYPEINFO_ENUM_QUERY: &str = "\ -SELECT enumlabel -FROM pg_catalog.pg_enum -WHERE enumtypid = $1 -ORDER BY enumsortorder -"; - -pub(crate) const TYPEINFO_COMPOSITE_QUERY: &str = "\ -SELECT attname, atttypid -FROM pg_catalog.pg_attribute -WHERE attrelid = $1 -AND NOT attisdropped -AND attnum > 0 -ORDER BY attnum -"; - -pub async fn prepare( +async fn prepare_typecheck( client: &Arc, name: &'static str, query: &str, @@ -67,7 +51,7 @@ pub async fn prepare( let mut parameters = vec![]; let mut it = parameter_description.parameters(); while let Some(oid) = it.next().map_err(Error::parse)? { - let type_ = get_type(client, oid).await?; + let type_ = Type::from_oid(oid).ok_or_else(Error::unexpected_message)?; parameters.push(type_); } @@ -75,7 +59,7 @@ pub async fn prepare( if let Some(row_description) = row_description { let mut it = row_description.fields(); while let Some(field) = it.next().map_err(Error::parse)? { - let type_ = get_type(client, field.type_oid()).await?; + let type_ = Type::from_oid(field.type_oid()).ok_or_else(Error::unexpected_message)?; let column = Column::new(field.name().to_string(), type_, field); columns.push(column); } @@ -84,15 +68,6 @@ pub async fn prepare( Ok(Statement::new(client, name, parameters, columns)) } -fn prepare_rec<'a>( - client: &'a Arc, - name: &'static str, - query: &'a str, - types: &'a [Type], -) -> Pin> + 'a + Send>> { - Box::pin(prepare(client, name, query, types)) -} - fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Result { if types.is_empty() { debug!("preparing query {}: {}", name, query); @@ -108,16 +83,20 @@ fn encode(client: &InnerClient, name: &str, query: &str, types: &[Type]) -> Resu }) } -pub async fn get_type(client: &Arc, oid: Oid) -> Result { +pub async fn get_type( + client: &Arc, + typecache: &mut CachedTypeInfo, + oid: Oid, +) -> Result { if let Some(type_) = Type::from_oid(oid) { return Ok(type_); } - if let Some(type_) = client.type_(oid) { - return Ok(type_); - } + if let Some(type_) = typecache.types.get(&oid) { + return Ok(type_.clone()); + }; - let stmt = typeinfo_statement(client).await?; + let stmt = typeinfo_statement(client, typecache).await?; let rows = query::query(client, stmt, slice_iter(&[&oid])).await?; pin_mut!(rows); @@ -136,100 +115,48 @@ pub async fn get_type(client: &Arc, oid: Oid) -> Result( client: &'a Arc, + typecache: &'a mut CachedTypeInfo, oid: Oid, ) -> Pin> + Send + 'a>> { - Box::pin(get_type(client, oid)) + Box::pin(get_type(client, typecache, oid)) } -async fn typeinfo_statement(client: &Arc) -> Result { - if let Some(stmt) = client.typeinfo() { - return Ok(stmt); +async fn typeinfo_statement( + client: &Arc, + typecache: &mut CachedTypeInfo, +) -> Result { + if let Some(stmt) = &typecache.typeinfo { + return Ok(stmt.clone()); } let typeinfo = "neon_proxy_typeinfo"; - let stmt = prepare_rec(client, typeinfo, TYPEINFO_QUERY, &[]).await?; + let stmt = prepare_typecheck(client, typeinfo, TYPEINFO_QUERY, &[]).await?; - client.set_typeinfo(&stmt); - Ok(stmt) -} - -async fn get_enum_variants(client: &Arc, oid: Oid) -> Result, Error> { - let stmt = typeinfo_enum_statement(client).await?; - - query::query(client, stmt, slice_iter(&[&oid])) - .await? - .and_then(|row| async move { row.try_get(0) }) - .try_collect() - .await -} - -async fn typeinfo_enum_statement(client: &Arc) -> Result { - if let Some(stmt) = client.typeinfo_enum() { - return Ok(stmt); - } - - let typeinfo = "neon_proxy_typeinfo_enum"; - let stmt = prepare_rec(client, typeinfo, TYPEINFO_ENUM_QUERY, &[]).await?; - - client.set_typeinfo_enum(&stmt); - Ok(stmt) -} - -async fn get_composite_fields(client: &Arc, oid: Oid) -> Result, Error> { - let stmt = typeinfo_composite_statement(client).await?; - - let rows = query::query(client, stmt, slice_iter(&[&oid])) - .await? - .try_collect::>() - .await?; - - let mut fields = vec![]; - for row in rows { - let name = row.try_get(0)?; - let oid = row.try_get(1)?; - let type_ = get_type_rec(client, oid).await?; - fields.push(Field::new(name, type_)); - } - - Ok(fields) -} - -async fn typeinfo_composite_statement(client: &Arc) -> Result { - if let Some(stmt) = client.typeinfo_composite() { - return Ok(stmt); - } - - let typeinfo = "neon_proxy_typeinfo_composite"; - let stmt = prepare_rec(client, typeinfo, TYPEINFO_COMPOSITE_QUERY, &[]).await?; - - client.set_typeinfo_composite(&stmt); + typecache.typeinfo = Some(stmt.clone()); Ok(stmt) } diff --git a/libs/proxy/tokio-postgres2/src/transaction.rs b/libs/proxy/tokio-postgres2/src/transaction.rs index eecbfc5873..f32603470f 100644 --- a/libs/proxy/tokio-postgres2/src/transaction.rs +++ b/libs/proxy/tokio-postgres2/src/transaction.rs @@ -72,4 +72,9 @@ impl<'a> Transaction<'a> { pub fn client(&self) -> &Client { self.client } + + /// Returns a reference to the underlying `Client`. + pub fn client_mut(&mut self) -> &mut Client { + self.client + } } diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 612702231f..47009086c3 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -860,7 +860,13 @@ impl QueryData { let cancel_token = inner.cancel_token(); let res = match select( - pin!(query_to_json(config, &*inner, self, &mut 0, parsed_headers)), + pin!(query_to_json( + config, + &mut *inner, + self, + &mut 0, + parsed_headers + )), pin!(cancel.cancelled()), ) .await @@ -944,7 +950,7 @@ impl BatchQueryData { builder = builder.deferrable(true); } - let transaction = builder + let mut transaction = builder .start() .await .inspect_err(|_| { @@ -957,7 +963,7 @@ impl BatchQueryData { let json_output = match query_batch( config, cancel.child_token(), - &transaction, + &mut transaction, self, parsed_headers, ) @@ -1009,7 +1015,7 @@ impl BatchQueryData { async fn query_batch( config: &'static HttpConfig, cancel: CancellationToken, - transaction: &Transaction<'_>, + transaction: &mut Transaction<'_>, queries: BatchQueryData, parsed_headers: HttpHeaders, ) -> Result { @@ -1047,7 +1053,7 @@ async fn query_batch( async fn query_to_json( config: &'static HttpConfig, - client: &T, + client: &mut T, data: QueryData, current_size: &mut usize, parsed_headers: HttpHeaders, From bb3c0ff251f0925594ef40d16df26bb56770e68b Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Wed, 12 Mar 2025 11:09:02 -0500 Subject: [PATCH 165/207] Make collecting the installed extensions metric async (#11071) If the goal is to make compute_ctl completely asynchronous, then this is one step to getting there. Signed-off-by: Tristan Partin --- compute_tools/src/compute.rs | 6 ++-- compute_tools/src/installed_extensions.rs | 35 +++++++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 354528e2cd..c2a3e38ed6 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -645,9 +645,9 @@ impl ComputeNode { if pspec.spec.mode == ComputeMode::Primary { self.configure_as_primary(&compute_state)?; - let conf = self.get_conn_conf(None); - tokio::task::spawn_blocking(|| { - let res = get_installed_extensions(conf); + let conf = self.get_tokio_conn_conf(None); + tokio::task::spawn(async { + let res = get_installed_extensions(conf).await; match res { Ok(extensions) => { info!( diff --git a/compute_tools/src/installed_extensions.rs b/compute_tools/src/installed_extensions.rs index 6921505466..d95c168a99 100644 --- a/compute_tools/src/installed_extensions.rs +++ b/compute_tools/src/installed_extensions.rs @@ -2,7 +2,7 @@ use std::collections::HashMap; use anyhow::Result; use compute_api::responses::{InstalledExtension, InstalledExtensions}; -use postgres::{Client, NoTls}; +use tokio_postgres::{Client, Config, NoTls}; use crate::metrics::INSTALLED_EXTENSIONS; @@ -10,7 +10,7 @@ use crate::metrics::INSTALLED_EXTENSIONS; /// and to make database listing query here more explicit. /// /// Limit the number of databases to 500 to avoid excessive load. -fn list_dbs(client: &mut Client) -> Result> { +async fn list_dbs(client: &mut Client) -> Result> { // `pg_database.datconnlimit = -2` means that the database is in the // invalid state let databases = client @@ -20,7 +20,8 @@ fn list_dbs(client: &mut Client) -> Result> { AND datconnlimit <> - 2 LIMIT 500", &[], - )? + ) + .await? .iter() .map(|row| { let db: String = row.get("datname"); @@ -36,20 +37,36 @@ fn list_dbs(client: &mut Client) -> Result> { /// Same extension can be installed in multiple databases with different versions, /// so we report a separate metric (number of databases where it is installed) /// for each extension version. -pub fn get_installed_extensions(mut conf: postgres::config::Config) -> Result { +pub async fn get_installed_extensions(mut conf: Config) -> Result { conf.application_name("compute_ctl:get_installed_extensions"); - let mut client = conf.connect(NoTls)?; - let databases: Vec = list_dbs(&mut client)?; + let databases: Vec = { + let (mut client, connection) = conf.connect(NoTls).await?; + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + list_dbs(&mut client).await? + }; let mut extensions_map: HashMap<(String, String, String), InstalledExtension> = HashMap::new(); for db in databases.iter() { conf.dbname(db); - let mut db_client = conf.connect(NoTls)?; - let extensions: Vec<(String, String, i32)> = db_client + + let (client, connection) = conf.connect(NoTls).await?; + tokio::spawn(async move { + if let Err(e) = connection.await { + eprintln!("connection error: {}", e); + } + }); + + let extensions: Vec<(String, String, i32)> = client .query( "SELECT extname, extversion, extowner::integer FROM pg_catalog.pg_extension", &[], - )? + ) + .await? .iter() .map(|row| { ( From 5eed0e4b94ffcfd686d071aa278f96deebddaad9 Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Wed, 12 Mar 2025 12:31:09 -0500 Subject: [PATCH 166/207] Add docs to performance/test_logical_replication.py on how to run the suite (#10175) These docs are in tandem with what was recently published on the internal docs site. Signed-off-by: Tristan Partin --- .../performance/test_logical_replication.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test_runner/performance/test_logical_replication.py b/test_runner/performance/test_logical_replication.py index fdc56cc496..807ed522e1 100644 --- a/test_runner/performance/test_logical_replication.py +++ b/test_runner/performance/test_logical_replication.py @@ -23,6 +23,25 @@ if TYPE_CHECKING: from psycopg2.extensions import connection, cursor +""" +These benchmarks stress test logical replication within Neon. In order to run +them locally, they require setting up some infrastructure. See +https://docs.neon.build/compute/logical_replication_benchmarks.html for how to +do that. After setting that up, run the following shell commands. + +# These are the project IDs setup for the purposes of running these benchmarks +export BENCHMARK_PROJECT_ID_PUB= +export BENCHMARK_PROJECT_ID_SUB= + +# See https://neon.tech/docs/manage/api-keys +export NEON_API_KEY= + +# Fiddling with the --timeout parameter may be required depending on the +# performance of the benchmark +pytest -m remote_cluster 'test_runner/performance/test_logical_replication.py' +""" + + @pytest.mark.timeout(1000) def test_logical_replication(neon_simple_env: NeonEnv, pg_bin: PgBin, vanilla_pg: VanillaPostgres): env = neon_simple_env From 8a5a739af0c66acc061a5a0f1192e1759ce47738 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 12 Mar 2025 16:34:19 -0400 Subject: [PATCH 167/207] test(pageserver): add small tenant compaction (#11049) ## Problem close https://github.com/neondatabase/neon/issues/10881 ## Summary of changes Mock a tenant with very small amount of data. --------- Signed-off-by: Alex Chi Z --- test_runner/regress/test_compaction.py | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/test_runner/regress/test_compaction.py b/test_runner/regress/test_compaction.py index 0df88e14c2..c8cce7a4e7 100644 --- a/test_runner/regress/test_compaction.py +++ b/test_runner/regress/test_compaction.py @@ -524,6 +524,42 @@ def test_pageserver_gc_compaction_trigger(neon_env_builder: NeonEnvBuilder): workload.validate(env.pageserver.id) +def test_pageserver_small_tenant_compaction(neon_env_builder: NeonEnvBuilder): + """ + Create a small tenant that rarely needs compaction and ensure that everything works. + """ + SMOKE_CONF = { + # Run both gc and gc-compaction. + "gc_period": "5s", + "compaction_period": "5s", + # No PiTR interval and small GC horizon + "pitr_interval": "0s", + "gc_horizon": 1024, + "lsn_lease_length": "0s", + } + + env = neon_env_builder.init_start(initial_tenant_conf=SMOKE_CONF) + tenant_id = env.initial_tenant + timeline_id = env.initial_timeline + + ps_http = env.pageserver.http_client() + + workload = Workload(env, tenant_id, timeline_id) + workload.init(env.pageserver.id) + + log.info("Writing initial data ...") + workload.write_rows(10000, env.pageserver.id) + + for _ in range(100): + workload.churn_rows(10, env.pageserver.id, upload=False, ingest=False) + ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=True) + ps_http.timeline_compact(tenant_id, timeline_id) + ps_http.timeline_gc(tenant_id, timeline_id, None) + + log.info("Validating at workload end ...") + workload.validate(env.pageserver.id) + + # Stripe sizes in number of pages. TINY_STRIPES = 16 LARGE_STRIPES = 32768 From ef0d4a48a8546625ba9824c86839e71851b9bbdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Wed, 12 Mar 2025 22:00:59 +0100 Subject: [PATCH 168/207] Reuse artifacts from release PRs (#11061) ## Problem When we release our components, we perform builds in the release PR, then test the components, then merge the PR, and then build everything *again*, run tests *again*, and only then start deployments. To speed things up, we want to perform builds and run tests in the PR, and start deployments using the existing artifacts from the release PR. To make that possible, we need to have both CI pipelines running on the same commit hash, which requires fast forwarding release. That only works, if we have a commit in the PR that has the current release branch state as an ancestor. ## Summary of changes - Changes to release PR creation: - Remove templates and automatic bodies for release PRs. The previous template wasn't used anymore, and the automatic body we created in the pipeline didn't contain any useful content anymore after the changees here. - Make it possible to select the source branch. For releases that aren't cut from `main`, like https://github.com/neondatabase/neon/pull/11051, we need a way to trigger the new flow from a different branch. - Determine `release-branch` automatically from the component name instead of passing that as well. - Changes to the merge queue job: - Rename `get-changed-files` to `meta` in preparation of additional data being fetched as part of that job - Fail the merge queue if we're trying to merge into a branch other than main - this is to prevent non-fast-forward merges. - Label PRs to branches other than main as `fast-forward`, to trigger the fast-forward job - Add a fast-forward job that can be triggered with the `fast-forward` label that performs a fast-forward merge. This only happens if the PR has `mergeable_state == clean`, so CI having passed. - Build and Test on releases now skips building images, skips testing images and skips triggering e2e tests. We add new tags to the images from the release PR to tag them as release images, and we push them to the prod registries. --- .github/PULL_REQUEST_TEMPLATE/release-pr.md | 21 ---- .github/scripts/generate_image_maps.py | 43 ++++---- .github/scripts/lint-release-pr.sh | 110 ++++++++++++++++++++ .github/workflows/_create-release-pr.yml | 45 +++++--- .github/workflows/_meta.yml | 14 +++ .github/workflows/build_and_test.yml | 80 ++++++-------- .github/workflows/fast-forward.yml | 36 +++++++ .github/workflows/lint-release-pr.yml | 23 ++++ .github/workflows/pre-merge-checks.yml | 47 ++++++--- .github/workflows/release.yml | 6 +- 10 files changed, 300 insertions(+), 125 deletions(-) delete mode 100644 .github/PULL_REQUEST_TEMPLATE/release-pr.md create mode 100755 .github/scripts/lint-release-pr.sh create mode 100644 .github/workflows/fast-forward.yml create mode 100644 .github/workflows/lint-release-pr.yml diff --git a/.github/PULL_REQUEST_TEMPLATE/release-pr.md b/.github/PULL_REQUEST_TEMPLATE/release-pr.md deleted file mode 100644 index 44b3094c24..0000000000 --- a/.github/PULL_REQUEST_TEMPLATE/release-pr.md +++ /dev/null @@ -1,21 +0,0 @@ -## Release 202Y-MM-DD - -**NB: this PR must be merged only by 'Create a merge commit'!** - -### Checklist when preparing for release -- [ ] Read or refresh [the release flow guide](https://www.notion.so/neondatabase/Release-general-flow-61f2e39fd45d4d14a70c7749604bd70b) -- [ ] Ask in the [cloud Slack channel](https://neondb.slack.com/archives/C033A2WE6BZ) that you are going to rollout the release. Any blockers? -- [ ] Does this release contain any db migrations? Destructive ones? What is the rollback plan? - - - -### Checklist after release -- [ ] Make sure instructions from PRs included in this release and labeled `manual_release_instructions` are executed (either by you or by people who wrote them). -- [ ] Based on the merged commits write release notes and open a PR into `website` repo ([example](https://github.com/neondatabase/website/pull/219/files)) -- [ ] Check [#dev-production-stream](https://neondb.slack.com/archives/C03F5SM1N02) Slack channel -- [ ] Check [stuck projects page](https://console.neon.tech/admin/projects?sort=last_active&order=desc&stuck=true) -- [ ] Check [recent operation failures](https://console.neon.tech/admin/operations?action=create_timeline%2Cstart_compute%2Cstop_compute%2Csuspend_compute%2Capply_config%2Cdelete_timeline%2Cdelete_tenant%2Ccreate_branch%2Ccheck_availability&sort=updated_at&order=desc&had_retries=some) -- [ ] Check [cloud SLO dashboard](https://neonprod.grafana.net/d/_oWcBMJ7k/cloud-slos?orgId=1) -- [ ] Check [compute startup metrics dashboard](https://neonprod.grafana.net/d/5OkYJEmVz/compute-startup-time) - - diff --git a/.github/scripts/generate_image_maps.py b/.github/scripts/generate_image_maps.py index 39ece5b38f..f67e07024c 100644 --- a/.github/scripts/generate_image_maps.py +++ b/.github/scripts/generate_image_maps.py @@ -1,14 +1,16 @@ import itertools import json import os +import sys -build_tag = os.environ["BUILD_TAG"] -branch = os.environ["BRANCH"] -dev_acr = os.environ["DEV_ACR"] -prod_acr = os.environ["PROD_ACR"] -dev_aws = os.environ["DEV_AWS"] -prod_aws = os.environ["PROD_AWS"] -aws_region = os.environ["AWS_REGION"] +source_tag = os.getenv("SOURCE_TAG") +target_tag = os.getenv("TARGET_TAG") +branch = os.getenv("BRANCH") +dev_acr = os.getenv("DEV_ACR") +prod_acr = os.getenv("PROD_ACR") +dev_aws = os.getenv("DEV_AWS") +prod_aws = os.getenv("PROD_AWS") +aws_region = os.getenv("AWS_REGION") components = { "neon": ["neon"], @@ -39,24 +41,23 @@ registries = { outputs: dict[str, dict[str, list[str]]] = {} -target_tags = [build_tag, "latest"] if branch == "main" else [build_tag] -target_stages = ["dev", "prod"] if branch.startswith("release") else ["dev"] +target_tags = [target_tag, "latest"] if branch == "main" else [target_tag] +target_stages = ( + ["dev", "prod"] if branch in ["release", "release-proxy", "release-compute"] else ["dev"] +) for component_name, component_images in components.items(): for stage in target_stages: - outputs[f"{component_name}-{stage}"] = dict( - [ - ( - f"docker.io/neondatabase/{component_image}:{build_tag}", - [ - f"{combo[0]}/{component_image}:{combo[1]}" - for combo in itertools.product(registries[stage], target_tags) - ], - ) - for component_image in component_images + outputs[f"{component_name}-{stage}"] = { + f"docker.io/neondatabase/{component_image}:{source_tag}": [ + f"{registry}/{component_image}:{tag}" + for registry, tag in itertools.product(registries[stage], target_tags) + if not (registry == "docker.io/neondatabase" and tag == source_tag) ] - ) + for component_image in component_images + } -with open(os.environ["GITHUB_OUTPUT"], "a") as f: +with open(os.getenv("GITHUB_OUTPUT", "/dev/null"), "a") as f: for key, value in outputs.items(): f.write(f"{key}={json.dumps(value)}\n") + print(f"Image map for {key}:\n{json.dumps(value, indent=2)}\n\n", file=sys.stderr) diff --git a/.github/scripts/lint-release-pr.sh b/.github/scripts/lint-release-pr.sh new file mode 100755 index 0000000000..8e081000f9 --- /dev/null +++ b/.github/scripts/lint-release-pr.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +set -euo pipefail + +DOCS_URL="https://docs.neon.build/overview/repositories/neon.html" + +message() { + if [[ -n "${GITHUB_PR_NUMBER:-}" ]]; then + gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --edit-last --body "$1" \ + || gh pr comment --repo "${GITHUB_REPOSITORY}" "${GITHUB_PR_NUMBER}" --body "$1" + fi + echo "$1" +} + +report_error() { + message "❌ $1 + For more details, see the documentation: ${DOCS_URL}" + + exit 1 +} + +case "$RELEASE_BRANCH" in + "release") COMPONENT="Storage" ;; + "release-proxy") COMPONENT="Proxy" ;; + "release-compute") COMPONENT="Compute" ;; + *) + report_error "Unknown release branch: ${RELEASE_BRANCH}" + ;; +esac + + +# Identify main and release branches +MAIN_BRANCH="origin/main" +REMOTE_RELEASE_BRANCH="origin/${RELEASE_BRANCH}" + +# Find merge base +MERGE_BASE=$(git merge-base "${MAIN_BRANCH}" "${REMOTE_RELEASE_BRANCH}") +echo "Merge base of ${MAIN_BRANCH} and ${RELEASE_BRANCH}: ${MERGE_BASE}" + +# Get the HEAD commit (last commit in PR, expected to be the merge commit) +LAST_COMMIT=$(git rev-parse HEAD) + +MERGE_COMMIT_MESSAGE=$(git log -1 --format=%s "${LAST_COMMIT}") +EXPECTED_MESSAGE_REGEX="^$COMPONENT release [0-9]{4}-[0-9]{2}-[0-9]{2}$" + +if ! [[ "${MERGE_COMMIT_MESSAGE}" =~ ${EXPECTED_MESSAGE_REGEX} ]]; then + report_error "Merge commit message does not match expected pattern: ' release YYYY-MM-DD' + Expected component: ${COMPONENT} + Found: '${MERGE_COMMIT_MESSAGE}'" +fi +echo "✅ Merge commit message is correctly formatted: '${MERGE_COMMIT_MESSAGE}'" + +LAST_COMMIT_PARENTS=$(git cat-file -p "${LAST_COMMIT}" | jq -sR '[capture("parent (?[0-9a-f]{40})"; "g") | .parent]') + +if [[ "$(echo "${LAST_COMMIT_PARENTS}" | jq 'length')" -ne 2 ]]; then + report_error "Last commit must be a merge commit with exactly two parents" +fi + +EXPECTED_RELEASE_HEAD=$(git rev-parse "${REMOTE_RELEASE_BRANCH}") +if echo "${LAST_COMMIT_PARENTS}" | jq -e --arg rel "${EXPECTED_RELEASE_HEAD}" 'index($rel) != null' > /dev/null; then + LINEAR_HEAD=$(echo "${LAST_COMMIT_PARENTS}" | jq -r '[.[] | select(. != $rel)][0]' --arg rel "${EXPECTED_RELEASE_HEAD}") +else + report_error "Last commit must merge the release branch (${RELEASE_BRANCH})" +fi +echo "✅ Last commit correctly merges the previous commit and the release branch" +echo "Top commit of linear history: ${LINEAR_HEAD}" + +MERGE_COMMIT_TREE=$(git rev-parse "${LAST_COMMIT}^{tree}") +LINEAR_HEAD_TREE=$(git rev-parse "${LINEAR_HEAD}^{tree}") + +if [[ "${MERGE_COMMIT_TREE}" != "${LINEAR_HEAD_TREE}" ]]; then + report_error "Tree of merge commit (${MERGE_COMMIT_TREE}) does not match tree of linear history head (${LINEAR_HEAD_TREE}) + This indicates that the merge of ${RELEASE_BRANCH} into this branch was not performed using the merge strategy 'ours'" +fi +echo "✅ Merge commit tree matches the linear history head" + +EXPECTED_PREVIOUS_COMMIT="${LINEAR_HEAD}" + +# Now traverse down the history, ensuring each commit has exactly one parent +CURRENT_COMMIT="${EXPECTED_PREVIOUS_COMMIT}" +while [[ "${CURRENT_COMMIT}" != "${MERGE_BASE}" && "${CURRENT_COMMIT}" != "${EXPECTED_RELEASE_HEAD}" ]]; do + CURRENT_COMMIT_PARENTS=$(git cat-file -p "${CURRENT_COMMIT}" | jq -sR '[capture("parent (?[0-9a-f]{40})"; "g") | .parent]') + + if [[ "$(echo "${CURRENT_COMMIT_PARENTS}" | jq 'length')" -ne 1 ]]; then + report_error "Commit ${CURRENT_COMMIT} must have exactly one parent" + fi + + NEXT_COMMIT=$(echo "${CURRENT_COMMIT_PARENTS}" | jq -r '.[0]') + + if [[ "${NEXT_COMMIT}" == "${MERGE_BASE}" ]]; then + echo "✅ Reached merge base (${MERGE_BASE})" + PR_BASE="${MERGE_BASE}" + if [[ "${NEXT_COMMIT}" == "${EXPECTED_RELEASE_HEAD}" ]]; then + echo "✅ Reached release branch (${EXPECTED_RELEASE_HEAD})" + PR_BASE="${EXPECTED_RELEASE_HEAD}" + elif [[ -z "${NEXT_COMMIT}" ]]; then + report_error "Unexpected end of commit history before reaching merge base" + fi + + # Move to the next commit in the chain + CURRENT_COMMIT="${NEXT_COMMIT}" +done + +echo "✅ All commits are properly ordered and linear" +echo "✅ Release PR structure is valid" + +echo + +message "Commits that are part of this release: +$(git log --oneline "${PR_BASE}..${LINEAR_HEAD}")" diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml index 3c130c8229..82acbc0f84 100644 --- a/.github/workflows/_create-release-pr.yml +++ b/.github/workflows/_create-release-pr.yml @@ -7,8 +7,8 @@ on: description: 'Component name' required: true type: string - release-branch: - description: 'Release branch' + source-branch: + description: 'Source branch' required: true type: string secrets: @@ -30,17 +30,24 @@ jobs: steps: - uses: actions/checkout@v4 with: - ref: main + ref: ${{ inputs.source-branch }} - name: Set variables id: vars env: COMPONENT_NAME: ${{ inputs.component-name }} - RELEASE_BRANCH: ${{ inputs.release-branch }} + RELEASE_BRANCH: >- + ${{ + false + || inputs.component-name == 'Storage' && 'release' + || inputs.component-name == 'Proxy' && 'release-proxy' + || inputs.component-name == 'Compute' && 'release-compute' + }} run: | today=$(date +'%Y-%m-%d') echo "title=${COMPONENT_NAME} release ${today}" | tee -a ${GITHUB_OUTPUT} echo "rc-branch=rc/${RELEASE_BRANCH}/${today}" | tee -a ${GITHUB_OUTPUT} + echo "release-branch=${RELEASE_BRANCH}" | tee -a ${GITHUB_OUTPUT} - name: Configure git run: | @@ -49,31 +56,35 @@ jobs: - name: Create RC branch env: + RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }} RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} TITLE: ${{ steps.vars.outputs.title }} run: | - git checkout -b "${RC_BRANCH}" + git switch -c "${RC_BRANCH}" - # create an empty commit to distinguish workflow runs - # from other possible releases from the same commit - git commit --allow-empty -m "${TITLE}" + # Manually create a merge commit on the current branch, keeping the + # tree and setting the parents to the current HEAD and the HEAD of the + # release branch. This commit is what we'll fast-forward the release + # branch to when merging the release branch. + # For details on why, look at + # https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs + current_tree=$(git rev-parse 'HEAD^{tree}') + release_head=$(git rev-parse "${RELEASE_BRANCH}") + current_head=$(git rev-parse HEAD) + merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}") + + # Fast-forward the current branch to the newly created merge_commit + git merge --ff-only ${merge_commit} git push origin "${RC_BRANCH}" - - name: Create a PR into ${{ inputs.release-branch }} + - name: Create a PR into ${{ steps.vars.outputs.release-branch }} env: GH_TOKEN: ${{ secrets.ci-access-token }} RC_BRANCH: ${{ steps.vars.outputs.rc-branch }} - RELEASE_BRANCH: ${{ inputs.release-branch }} + RELEASE_BRANCH: ${{ steps.vars.outputs.release-branch }} TITLE: ${{ steps.vars.outputs.title }} run: | - cat << EOF > body.md - ## ${TITLE} - - **Please merge this Pull Request using 'Create a merge commit' button** - EOF - gh pr create --title "${TITLE}" \ - --body-file "body.md" \ --head "${RC_BRANCH}" \ --base "${RELEASE_BRANCH}" diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index cae7fae6a4..c9e7b66efa 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -21,6 +21,9 @@ on: run-kind: description: "The kind of run we're currently in. Will be one of `push-main`, `storage-release`, `compute-release`, `proxy-release`, `storage-rc-pr`, `compute-rc-pr`, `proxy-rc-pr`, `pr`, or `workflow-dispatch`" value: ${{ jobs.tags.outputs.run-kind }} + release-pr-run-id: + description: "Only available if `run-kind in [storage-release, proxy-release, compute-release]`. Contains the run ID of the `Build and Test` workflow, assuming one with the current commit can be found." + value: ${{ jobs.tags.outputs.release-pr-run-id }} permissions: {} @@ -37,6 +40,7 @@ jobs: proxy: ${{ steps.previous-releases.outputs.proxy }} storage: ${{ steps.previous-releases.outputs.storage }} run-kind: ${{ steps.run-kind.outputs.run-kind }} + release-pr-run-id: ${{ steps.release-pr-run-id.outputs.release-pr-run-id }} permissions: contents: read steps: @@ -113,3 +117,13 @@ jobs: "/repos/${GITHUB_REPOSITORY}/releases" \ | jq -f .github/scripts/previous-releases.jq -r \ | tee -a "${GITHUB_OUTPUT}" + + - name: Get the release PR run ID + id: release-pr-run-id + if: ${{ contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), steps.run-kind.outputs.run-kind) }} + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} + run: | + RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // "Faied to find Build and Test run from RC PR!" | halt_error(1)') + echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1c0971a49d..e1ad972a61 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -476,7 +476,7 @@ jobs: ( !github.event.pull_request.draft || contains( github.event.pull_request.labels.*.name, 'run-e2e-tests-in-draft') - || contains(fromJSON('["push-main", "storage-release", "proxy-release", "compute-release"]'), needs.meta.outputs.run-kind) + || needs.meta.outputs.run-kind == 'push-main' ) && !failure() && !cancelled() }} needs: [ check-permissions, push-neon-image-dev, push-compute-image-dev, meta ] @@ -487,7 +487,7 @@ jobs: neon-image-arch: needs: [ check-permissions, build-build-tools-image, meta ] - if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} strategy: matrix: arch: [ x64, arm64 ] @@ -537,7 +537,7 @@ jobs: neon-image: needs: [ neon-image-arch, meta ] - if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 permissions: id-token: write # aws-actions/configure-aws-credentials @@ -559,7 +559,7 @@ jobs: compute-node-image-arch: needs: [ check-permissions, build-build-tools-image, meta ] - if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -651,7 +651,7 @@ jobs: compute-node-image: needs: [ compute-node-image-arch, meta ] - if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -694,7 +694,7 @@ jobs: vm-compute-node-image-arch: needs: [ check-permissions, meta, compute-node-image ] - if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ${{ fromJson(format('["self-hosted", "{0}"]', matrix.arch == 'arm64' && 'large-arm64' || 'large')) }} strategy: fail-fast: false @@ -747,7 +747,7 @@ jobs: vm-compute-node-image: needs: [ vm-compute-node-image-arch, meta ] - if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} runs-on: ubuntu-22.04 strategy: matrix: @@ -773,7 +773,12 @@ jobs: test-images: needs: [ check-permissions, meta, neon-image, compute-node-image ] # Depends on jobs that can get skipped - if: "!failure() && !cancelled()" + if: >- + ${{ + !failure() + && !cancelled() + && contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) + }} strategy: fail-fast: false matrix: @@ -800,7 +805,7 @@ jobs: # Ensure that we don't have bad versions. - name: Verify image versions shell: bash # ensure no set -e for better error messages - if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} run: | pageserver_version=$(docker run --rm neondatabase/neon:${{ needs.meta.outputs.build-tag }} "/bin/sh" "-c" "/usr/local/bin/pageserver --version") @@ -821,19 +826,19 @@ jobs: env: TAG: >- ${{ - contains(fromJSON('["compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) + needs.meta.outputs.run-kind == 'compute-rc-pr' && needs.meta.outputs.previous-storage-release || needs.meta.outputs.build-tag }} COMPUTE_TAG: >- ${{ - contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) + contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) && needs.meta.outputs.previous-compute-release || needs.meta.outputs.build-tag }} TEST_EXTENSIONS_TAG: >- ${{ - contains(fromJSON('["storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) + contains(fromJSON('["storage-rc-pr", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) && 'latest' || needs.meta.outputs.build-tag }} @@ -885,7 +890,13 @@ jobs: id: generate run: python3 .github/scripts/generate_image_maps.py env: - BUILD_TAG: "${{ needs.meta.outputs.build-tag }}" + SOURCE_TAG: >- + ${{ + contains(fromJson('["storage-release", "compute-release", "proxy-release"]'), needs.meta.outputs.run-kind) + && needs.meta.outputs.release-pr-run-id + || needs.meta.outputs.build-tag + }} + TARGET_TAG: ${{ needs.meta.outputs.build-tag }} BRANCH: "${{ github.ref_name }}" DEV_ACR: "${{ vars.AZURE_DEV_REGISTRY_NAME }}" PROD_ACR: "${{ vars.AZURE_PROD_REGISTRY_NAME }}" @@ -895,7 +906,7 @@ jobs: push-neon-image-dev: needs: [ meta, generate-image-maps, neon-image ] - if: ${{ contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -913,7 +924,7 @@ jobs: push-compute-image-dev: needs: [ meta, generate-image-maps, vm-compute-node-image ] - if: ${{ contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} + if: ${{ !failure() && !cancelled() && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} uses: ./.github/workflows/_push-to-container-registry.yml permissions: id-token: write # Required for aws/azure login @@ -1235,7 +1246,7 @@ jobs: # The job runs on `release` branch and copies compatibility data and Neon artifact from the last *release PR* to the latest directory promote-compatibility-data: - needs: [ deploy ] + needs: [ meta, deploy ] permissions: id-token: write # aws-actions/configure-aws-credentials statuses: write @@ -1245,37 +1256,6 @@ jobs: runs-on: ubuntu-22.04 steps: - - name: Fetch GITHUB_RUN_ID and COMMIT_SHA for the last merged release PR - id: fetch-last-release-pr-info - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: | - branch_name_and_pr_number=$(gh pr list \ - --repo "${GITHUB_REPOSITORY}" \ - --base release \ - --state merged \ - --limit 10 \ - --json mergeCommit,headRefName,number \ - --jq ".[] | select(.mergeCommit.oid==\"${GITHUB_SHA}\") | { branch_name: .headRefName, pr_number: .number }") - branch_name=$(echo "${branch_name_and_pr_number}" | jq -r '.branch_name') - pr_number=$(echo "${branch_name_and_pr_number}" | jq -r '.pr_number') - - run_id=$(gh run list \ - --repo "${GITHUB_REPOSITORY}" \ - --workflow build_and_test.yml \ - --branch "${branch_name}" \ - --json databaseId \ - --limit 1 \ - --jq '.[].databaseId') - - last_commit_sha=$(gh pr view "${pr_number}" \ - --repo "${GITHUB_REPOSITORY}" \ - --json commits \ - --jq '.commits[-1].oid') - - echo "run-id=${run_id}" | tee -a ${GITHUB_OUTPUT} - echo "commit-sha=${last_commit_sha}" | tee -a ${GITHUB_OUTPUT} - - uses: aws-actions/configure-aws-credentials@v4 with: aws-region: eu-central-1 @@ -1286,8 +1266,8 @@ jobs: env: BUCKET: neon-github-public-dev AWS_REGION: eu-central-1 - COMMIT_SHA: ${{ steps.fetch-last-release-pr-info.outputs.commit-sha }} - RUN_ID: ${{ steps.fetch-last-release-pr-info.outputs.run-id }} + COMMIT_SHA: ${{ github.sha }} + RUN_ID: ${{ needs.meta.outputs.release-pr-run-id }} run: | old_prefix="artifacts/${COMMIT_SHA}/${RUN_ID}" new_prefix="artifacts/latest" @@ -1376,5 +1356,5 @@ jobs: || needs.files-changed.result == 'skipped' || (needs.push-compute-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) || (needs.push-neon-image-dev.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-release", "storage-rc-pr", "proxy-release", "proxy-rc-pr"]'), needs.meta.outputs.run-kind)) - || needs.test-images.result == 'skipped' + || (needs.test-images.result == 'skipped' && contains(fromJSON('["push-main", "pr", "storage-rc-pr", "proxy-rc-pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) || (needs.trigger-custom-extensions-build-and-wait.result == 'skipped' && contains(fromJSON('["push-main", "pr", "compute-release", "compute-rc-pr"]'), needs.meta.outputs.run-kind)) diff --git a/.github/workflows/fast-forward.yml b/.github/workflows/fast-forward.yml new file mode 100644 index 0000000000..bc63ff120d --- /dev/null +++ b/.github/workflows/fast-forward.yml @@ -0,0 +1,36 @@ +name: Fast forward merge +on: + pull_request: + types: [labeled] + branches: + - release + - release-proxy + - release-compute + +jobs: + fast-forward: + if: ${{ github.event.label.name == 'fast-forward' }} + runs-on: ubuntu-22.04 + + steps: + - name: Remove fast-forward label to PR + env: + GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} + run: | + gh pr edit ${{ github.event.pull_request.number }} --repo "${GITHUB_REPOSITORY}" --remove-label "fast-forward" + + - name: Fast forwarding + uses: sequoia-pgp/fast-forward@ea7628bedcb0b0b96e94383ada458d812fca4979 + # See https://docs.github.com/en/graphql/reference/enums#mergestatestatus + if: ${{ github.event.pull_request.mergeable_state == 'clean' }} + with: + merge: true + comment: on-error + github_token: ${{ secrets.CI_ACCESS_TOKEN }} + + - name: Comment if mergeable_state is not clean + if: ${{ github.event.pull_request.mergeable_state != 'clean' }} + run: | + gh pr comment ${{ github.event.pull_request.number }} \ + --repo "${GITHUB_REPOSITORY}" \ + --body "Not trying to forward pull-request, because \`mergeable_state\` is \`${{ github.event.pull_request.mergeable_state }}\`, not \`clean\`." diff --git a/.github/workflows/lint-release-pr.yml b/.github/workflows/lint-release-pr.yml new file mode 100644 index 0000000000..f12ddfe377 --- /dev/null +++ b/.github/workflows/lint-release-pr.yml @@ -0,0 +1,23 @@ +name: Lint Release PR + +on: + pull_request: + branches: + - release + - release-proxy + - release-compute + +jobs: + lint-release-pr: + runs-on: ubuntu-22.04 + steps: + - name: Checkout PR branch + uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch full history for git operations + + - name: Run lint script + env: + RELEASE_BRANCH: ${{ github.base_ref }} + run: | + ./.github/scripts/lint-release-pr.sh diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml index c47b3fe0de..1e81550314 100644 --- a/.github/workflows/pre-merge-checks.yml +++ b/.github/workflows/pre-merge-checks.yml @@ -8,8 +8,6 @@ on: - .github/workflows/build-build-tools-image.yml - .github/workflows/pre-merge-checks.yml merge_group: - branches: - - main defaults: run: @@ -19,11 +17,13 @@ defaults: permissions: {} jobs: - get-changed-files: + meta: runs-on: ubuntu-22.04 outputs: python-changed: ${{ steps.python-src.outputs.any_changed }} rust-changed: ${{ steps.rust-src.outputs.any_changed }} + branch: ${{ steps.group-metadata.outputs.branch }} + pr-number: ${{ steps.group-metadata.outputs.pr-number }} steps: - uses: actions/checkout@v4 @@ -58,12 +58,20 @@ jobs: echo "${PYTHON_CHANGED_FILES}" echo "${RUST_CHANGED_FILES}" + - name: Merge group metadata + if: ${{ github.event_name == 'merge_group' }} + id: group-metadata + env: + MERGE_QUEUE_REF: ${{ github.event.merge_group.head_ref }} + run: | + echo $MERGE_QUEUE_REF | jq -Rr 'capture("refs/heads/gh-readonly-queue/(?.*)/pr-(?[0-9]+)-[0-9a-f]{40}") | ["branch=" + .branch, "pr-number=" + .pr_number] | .[]' | tee -a "${GITHUB_OUTPUT}" + build-build-tools-image: if: | false - || needs.get-changed-files.outputs.python-changed == 'true' - || needs.get-changed-files.outputs.rust-changed == 'true' - needs: [ get-changed-files ] + || needs.meta.outputs.python-changed == 'true' + || needs.meta.outputs.rust-changed == 'true' + needs: [ meta ] uses: ./.github/workflows/build-build-tools-image.yml with: # Build only one combination to save time @@ -72,8 +80,8 @@ jobs: secrets: inherit check-codestyle-python: - if: needs.get-changed-files.outputs.python-changed == 'true' - needs: [ get-changed-files, build-build-tools-image ] + if: needs.meta.outputs.python-changed == 'true' + needs: [ meta, build-build-tools-image ] uses: ./.github/workflows/_check-codestyle-python.yml with: # `-bookworm-x64` suffix should match the combination in `build-build-tools-image` @@ -81,8 +89,8 @@ jobs: secrets: inherit check-codestyle-rust: - if: needs.get-changed-files.outputs.rust-changed == 'true' - needs: [ get-changed-files, build-build-tools-image ] + if: needs.meta.outputs.rust-changed == 'true' + needs: [ meta, build-build-tools-image ] uses: ./.github/workflows/_check-codestyle-rust.yml with: # `-bookworm-x64` suffix should match the combination in `build-build-tools-image` @@ -101,7 +109,7 @@ jobs: statuses: write # for `github.repos.createCommitStatus(...)` contents: write needs: - - get-changed-files + - meta - check-codestyle-python - check-codestyle-rust runs-on: ubuntu-22.04 @@ -129,7 +137,20 @@ jobs: run: exit 1 if: | false - || (needs.check-codestyle-python.result == 'skipped' && needs.get-changed-files.outputs.python-changed == 'true') - || (needs.check-codestyle-rust.result == 'skipped' && needs.get-changed-files.outputs.rust-changed == 'true') + || (github.event_name == 'merge_group' && needs.meta.outputs.branch != 'main') + || (needs.check-codestyle-python.result == 'skipped' && needs.meta.outputs.python-changed == 'true') + || (needs.check-codestyle-rust.result == 'skipped' && needs.meta.outputs.rust-changed == 'true') || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') + + - name: Add fast-forward label to PR to trigger fast-forward merge + if: >- + ${{ + always() + && github.event_name == 'merge_group' + && contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref) + }} + env: + GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} + run: >- + gh pr edit ${{ needs.meta.outputs.pr-number }} --repo "${GITHUB_REPOSITORY}" --add-label "fast-forward" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 919846ce44..a88ddecd0a 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -38,7 +38,7 @@ jobs: uses: ./.github/workflows/_create-release-pr.yml with: component-name: 'Storage' - release-branch: 'release' + source-branch: ${{ github.ref_name }} secrets: ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} @@ -51,7 +51,7 @@ jobs: uses: ./.github/workflows/_create-release-pr.yml with: component-name: 'Proxy' - release-branch: 'release-proxy' + source-branch: ${{ github.ref_name }} secrets: ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} @@ -64,6 +64,6 @@ jobs: uses: ./.github/workflows/_create-release-pr.yml with: component-name: 'Compute' - release-branch: 'release-compute' + source-branch: ${{ github.ref_name }} secrets: ci-access-token: ${{ secrets.CI_ACCESS_TOKEN }} From c3b3b507f76b841f061104ede0f14c837e879bd7 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 12 Mar 2025 18:27:23 -0400 Subject: [PATCH 169/207] feat(pageserver): support detaching behavior v2 (#11158) ## Problem close https://github.com/neondatabase/neon/issues/10310 ## Summary of changes This patch adds a new behavior for the detach_ancestor API: detach with multi-level ancestor and no reparenting. Though we can potentially support multi-level + do reparenting / single-level + no-reparenting in the future, as it's not required for the recovery/snapshot epic, I'd prefer keeping things simple now that we only handle the old one and the new one instead of supporting the full feature matrix. I only added a test case of successful detaching instead of testing failures. I'd like to make this into staging and add more tests in the future. --------- Signed-off-by: Alex Chi Z --- pageserver/src/http/routes.rs | 6 +- pageserver/src/tenant/metadata.rs | 5 +- pageserver/src/tenant/mgr.rs | 10 +- pageserver/src/tenant/timeline.rs | 17 ++- .../src/tenant/timeline/detach_ancestor.rs | 138 ++++++++++++++++-- test_runner/fixtures/pageserver/http.py | 5 +- .../regress/test_timeline_detach_ancestor.py | 137 ++++++++++++++++- 7 files changed, 294 insertions(+), 24 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index ba5fb521ff..44159aee0a 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -72,6 +72,7 @@ use crate::tenant::remote_timeline_client::{ use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName}; +use crate::tenant::timeline::detach_ancestor::DetachBehavior; use crate::tenant::timeline::offload::{OffloadError, offload_timeline}; use crate::tenant::timeline::{ CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout, @@ -2505,6 +2506,8 @@ async fn timeline_detach_ancestor_handler( let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?; check_permission(&request, Some(tenant_shard_id.tenant_id))?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; + let behavior: Option = parse_query_param(&request, "detach_behavior")?; + let behavior = behavior.unwrap_or_default(); let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id); @@ -2554,7 +2557,7 @@ async fn timeline_detach_ancestor_handler( let ctx = &ctx.with_scope_timeline(&timeline); let progress = timeline - .prepare_to_detach_from_ancestor(&tenant, options, ctx) + .prepare_to_detach_from_ancestor(&tenant, options, behavior, ctx) .await?; // uncomment to allow early as possible Tenant::drop @@ -2569,6 +2572,7 @@ async fn timeline_detach_ancestor_handler( tenant_shard_id, timeline_id, prepared, + behavior, attempt, ctx, ) diff --git a/pageserver/src/tenant/metadata.rs b/pageserver/src/tenant/metadata.rs index 77f9a3579d..dceae89d1c 100644 --- a/pageserver/src/tenant/metadata.rs +++ b/pageserver/src/tenant/metadata.rs @@ -300,9 +300,8 @@ impl TimelineMetadata { /// Returns true if anything was changed pub fn detach_from_ancestor(&mut self, branchpoint: &(TimelineId, Lsn)) { - if let Some(ancestor) = self.body.ancestor_timeline { - assert_eq!(ancestor, branchpoint.0); - } + // Detaching from ancestor now doesn't always detach directly to the direct ancestor, but we + // ensure the LSN is the same. So we don't check the timeline ID. if self.body.ancestor_lsn != Lsn(0) { assert_eq!(self.body.ancestor_lsn, branchpoint.1); } diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 003f84e640..092bfdf6c1 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1914,6 +1914,7 @@ impl TenantManager { tenant_shard_id: TenantShardId, timeline_id: TimelineId, prepared: PreparedTimelineDetach, + behavior: detach_ancestor::DetachBehavior, mut attempt: detach_ancestor::Attempt, ctx: &RequestContext, ) -> Result, detach_ancestor::Error> { @@ -1957,7 +1958,14 @@ impl TenantManager { .map_err(Error::NotFound)?; let resp = timeline - .detach_from_ancestor_and_reparent(&tenant, prepared, ctx) + .detach_from_ancestor_and_reparent( + &tenant, + prepared, + attempt.ancestor_timeline_id, + attempt.ancestor_lsn, + behavior, + ctx, + ) .await?; let mut slot_guard = slot_guard; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index e01c3dbd4d..61542409f7 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -5388,9 +5388,10 @@ impl Timeline { self: &Arc, tenant: &crate::tenant::Tenant, options: detach_ancestor::Options, + behavior: detach_ancestor::DetachBehavior, ctx: &RequestContext, ) -> Result { - detach_ancestor::prepare(self, tenant, options, ctx).await + detach_ancestor::prepare(self, tenant, behavior, options, ctx).await } /// Second step of detach from ancestor; detaches the `self` from it's current ancestor and @@ -5406,9 +5407,21 @@ impl Timeline { self: &Arc, tenant: &crate::tenant::Tenant, prepared: detach_ancestor::PreparedTimelineDetach, + ancestor_timeline_id: TimelineId, + ancestor_lsn: Lsn, + behavior: detach_ancestor::DetachBehavior, ctx: &RequestContext, ) -> Result { - detach_ancestor::detach_and_reparent(self, tenant, prepared, ctx).await + detach_ancestor::detach_and_reparent( + self, + tenant, + prepared, + ancestor_timeline_id, + ancestor_lsn, + behavior, + ctx, + ) + .await } /// Final step which unblocks the GC. diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index b08003d04a..c3e4bedc50 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -32,6 +32,9 @@ pub(crate) enum Error { #[error("too many ancestors")] TooManyAncestors, + #[error("ancestor is not empty")] + AncestorNotEmpty, + #[error("shutting down, please retry later")] ShuttingDown, @@ -89,7 +92,9 @@ impl From for ApiError { fn from(value: Error) -> Self { match value { Error::NoAncestor => ApiError::Conflict(value.to_string()), - Error::TooManyAncestors => ApiError::BadRequest(anyhow::anyhow!("{value}")), + Error::TooManyAncestors | Error::AncestorNotEmpty => { + ApiError::BadRequest(anyhow::anyhow!("{value}")) + } Error::ShuttingDown => ApiError::ShuttingDown, Error::Archived(_) => ApiError::BadRequest(anyhow::anyhow!("{value}")), Error::OtherTimelineDetachOngoing(_) | Error::FailedToReparentAll => { @@ -127,13 +132,37 @@ pub(crate) struct PreparedTimelineDetach { layers: Vec, } -/// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments. +// TODO: this should be part of PageserverConf because we cannot easily modify cplane arguments. #[derive(Debug)] pub(crate) struct Options { pub(crate) rewrite_concurrency: std::num::NonZeroUsize, pub(crate) copy_concurrency: std::num::NonZeroUsize, } +/// Controls the detach ancestor behavior. +/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point. +/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all. +#[derive(Debug, Clone, Copy, Default)] +pub enum DetachBehavior { + #[default] + NoAncestorAndReparent, + MultiLevelAndNoReparent, +} + +impl std::str::FromStr for DetachBehavior { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match s { + "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent), + "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent), + "v1" => Ok(DetachBehavior::NoAncestorAndReparent), + "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent), + _ => Err("cannot parse detach behavior"), + } + } +} + impl Default for Options { fn default() -> Self { Self { @@ -147,7 +176,8 @@ impl Default for Options { #[derive(Debug)] pub(crate) struct Attempt { pub(crate) timeline_id: TimelineId, - + pub(crate) ancestor_timeline_id: TimelineId, + pub(crate) ancestor_lsn: Lsn, _guard: completion::Completion, gate_entered: Option, } @@ -167,25 +197,30 @@ impl Attempt { pub(super) async fn prepare( detached: &Arc, tenant: &Tenant, + behavior: DetachBehavior, options: Options, ctx: &RequestContext, ) -> Result { use Error::*; - let Some((ancestor, ancestor_lsn)) = detached + let Some((mut ancestor, mut ancestor_lsn)) = detached .ancestor_timeline .as_ref() .map(|tl| (tl.clone(), detached.ancestor_lsn)) else { + let ancestor_id; + let ancestor_lsn; let still_in_progress = { let accessor = detached.remote_client.initialized_upload_queue()?; // we are safe to inspect the latest uploaded, because we can only witness this after // restart is complete and ancestor is no more. let latest = accessor.latest_uploaded_index_part(); - if latest.lineage.detached_previous_ancestor().is_none() { + let Some((id, lsn)) = latest.lineage.detached_previous_ancestor() else { return Err(NoAncestor); }; + ancestor_id = id; + ancestor_lsn = lsn; latest .gc_blocking @@ -196,7 +231,8 @@ pub(super) async fn prepare( if still_in_progress { // gc is still blocked, we can still reparent and complete. // we are safe to reparent remaining, because they were locked in in the beginning. - let attempt = continue_with_blocked_gc(detached, tenant).await?; + let attempt = + continue_with_blocked_gc(detached, tenant, ancestor_id, ancestor_lsn).await?; // because the ancestor of detached is already set to none, we have published all // of the layers, so we are still "prepared." @@ -224,13 +260,34 @@ pub(super) async fn prepare( check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?; - if ancestor.ancestor_timeline.is_some() { + if let DetachBehavior::MultiLevelAndNoReparent = behavior { + // If the ancestor has an ancestor, we might be able to fast-path detach it if the current ancestor does not have any data written/used by the detaching timeline. + while let Some(ancestor_of_ancestor) = ancestor.ancestor_timeline.clone() { + if ancestor_lsn != ancestor.ancestor_lsn { + // non-technical requirement; we could flatten still if ancestor LSN does not match but that needs + // us to copy and cut more layers. + return Err(AncestorNotEmpty); + } + // Use the ancestor of the ancestor as the new ancestor (only when the ancestor LSNs are the same) + ancestor_lsn = ancestor.ancestor_lsn; // Get the LSN first before resetting the `ancestor` variable + ancestor = ancestor_of_ancestor; + // TODO: do we still need to check if we don't want to reparent? + check_no_archived_children_of_ancestor(tenant, detached, &ancestor, ancestor_lsn)?; + } + } else if ancestor.ancestor_timeline.is_some() { // non-technical requirement; we could flatten N ancestors just as easily but we chose // not to, at least initially return Err(TooManyAncestors); } - let attempt = start_new_attempt(detached, tenant).await?; + tracing::info!( + "attempt to detach the timeline from the ancestor: {}@{}, behavior={:?}", + ancestor.timeline_id, + ancestor_lsn, + behavior + ); + + let attempt = start_new_attempt(detached, tenant, ancestor.timeline_id, ancestor_lsn).await?; utils::pausable_failpoint!("timeline-detach-ancestor::before_starting_after_locking-pausable"); @@ -450,8 +507,13 @@ pub(super) async fn prepare( Ok(Progress::Prepared(attempt, prepared)) } -async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result { - let attempt = obtain_exclusive_attempt(detached, tenant)?; +async fn start_new_attempt( + detached: &Timeline, + tenant: &Tenant, + ancestor_timeline_id: TimelineId, + ancestor_lsn: Lsn, +) -> Result { + let attempt = obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn)?; // insert the block in the index_part.json, if not already there. let _dont_care = tenant @@ -466,13 +528,23 @@ async fn start_new_attempt(detached: &Timeline, tenant: &Tenant) -> Result Result { +async fn continue_with_blocked_gc( + detached: &Timeline, + tenant: &Tenant, + ancestor_timeline_id: TimelineId, + ancestor_lsn: Lsn, +) -> Result { // FIXME: it would be nice to confirm that there is an in-memory version, since we've just // verified there is a persistent one? - obtain_exclusive_attempt(detached, tenant) + obtain_exclusive_attempt(detached, tenant, ancestor_timeline_id, ancestor_lsn) } -fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result { +fn obtain_exclusive_attempt( + detached: &Timeline, + tenant: &Tenant, + ancestor_timeline_id: TimelineId, + ancestor_lsn: Lsn, +) -> Result { use Error::{OtherTimelineDetachOngoing, ShuttingDown}; // ensure we are the only active attempt for this tenant @@ -493,6 +565,8 @@ fn obtain_exclusive_attempt(detached: &Timeline, tenant: &Tenant) -> Result, tenant: &Tenant, prepared: PreparedTimelineDetach, + ancestor_timeline_id: TimelineId, + ancestor_lsn: Lsn, + behavior: DetachBehavior, _ctx: &RequestContext, ) -> Result { let PreparedTimelineDetach { layers } = prepared; @@ -822,7 +899,30 @@ pub(super) async fn detach_and_reparent( "cannot (detach? reparent)? complete if the operation is not still ongoing" ); - let ancestor = match (detached.ancestor_timeline.as_ref(), recorded_branchpoint) { + let ancestor_to_detach = match detached.ancestor_timeline.as_ref() { + Some(mut ancestor) => { + while ancestor.timeline_id != ancestor_timeline_id { + match ancestor.ancestor_timeline.as_ref() { + Some(found) => { + if ancestor_lsn != ancestor.ancestor_lsn { + return Err(Error::DetachReparent(anyhow::anyhow!( + "cannot find the ancestor timeline to detach from: wrong ancestor lsn" + ))); + } + ancestor = found; + } + None => { + return Err(Error::DetachReparent(anyhow::anyhow!( + "cannot find the ancestor timeline to detach from" + ))); + } + } + } + Some(ancestor) + } + None => None, + }; + let ancestor = match (ancestor_to_detach, recorded_branchpoint) { (Some(ancestor), None) => { assert!( !layers.is_empty(), @@ -895,6 +995,11 @@ pub(super) async fn detach_and_reparent( Ancestor::Detached(ancestor, ancestor_lsn) => (ancestor, ancestor_lsn, false), }; + if let DetachBehavior::MultiLevelAndNoReparent = behavior { + // Do not reparent if the user requests to behave so. + return Ok(DetachingAndReparenting::Reparented(HashSet::new())); + } + let mut tasks = tokio::task::JoinSet::new(); // Returns a single permit semaphore which will be used to make one reparenting succeed, @@ -1032,6 +1137,11 @@ pub(super) async fn complete( } /// Query against a locked `Tenant::timelines`. +/// +/// A timeline is reparentable if: +/// +/// - It is not the timeline being detached. +/// - It has the same ancestor as the timeline being detached. Note that the ancestor might not be the direct ancestor. fn reparentable_timelines<'a, I>( timelines: I, detached: &'a Arc, diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 0efe0b9575..61aab2213d 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -1070,11 +1070,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter): tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, batch_size: int | None = None, + behavior_v2: bool = False, **kwargs, ) -> set[TimelineId]: - params = {} + params: dict[str, Any] = {} if batch_size is not None: params["batch_size"] = batch_size + if behavior_v2: + params["detach_behavior"] = "v2" res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor", params=params, diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index 612a767480..79537ba83a 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -319,8 +319,9 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder): # this does not contain Z in the end, so fromisoformat accepts it # it is to be in line with the deletion timestamp.. well, almost. when = original_ancestor[2][:26] - when_ts = datetime.datetime.fromisoformat(when) - assert when_ts < datetime.datetime.now() + when_ts = datetime.datetime.fromisoformat(when).replace(tzinfo=datetime.UTC) + now = datetime.datetime.utcnow().replace(tzinfo=datetime.UTC) + assert when_ts < now assert len(lineage.get("reparenting_history", [])) == 0 elif expected_ancestor == timeline_id: assert len(lineage.get("original_ancestor", [])) == 0 @@ -342,6 +343,138 @@ def test_ancestor_detach_reparents_earlier(neon_env_builder: NeonEnvBuilder): wait_timeline_detail_404(client, env.initial_tenant, env.initial_timeline) +def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder): + """ + Test the v2 behavior of ancestor detach. + + old main -------|---------X---------> + | | | + | | +-> after + | +--X empty snapshot branch + | | + | +-> branch-to-detach + | + +-> earlier + + Ends up as: + + old main -------|---------X---------> + | | | + | | +-> after + | +--X empty snapshot branch + | + +-> earlier + + + new main -------|---------|----> branch-to-detach + """ + + env = neon_env_builder.init_start() + + env.pageserver.allowed_errors.extend(SHUTDOWN_ALLOWED_ERRORS) + + client = env.pageserver.http_client() + + with env.endpoints.create_start("main", tenant_id=env.initial_tenant) as ep: + ep.safe_psql("CREATE TABLE foo (i BIGINT);") + ep.safe_psql("CREATE TABLE audit AS SELECT 1 as starts;") + + branchpoint_pipe = wait_for_last_flush_lsn( + env, ep, env.initial_tenant, env.initial_timeline + ) + + ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(0, 8191) g(i);") + + branchpoint_x = wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) + client.timeline_checkpoint(env.initial_tenant, env.initial_timeline) + + ep.safe_psql("INSERT INTO foo SELECT i::bigint FROM generate_series(8192, 16383) g(i);") + wait_for_last_flush_lsn(env, ep, env.initial_tenant, env.initial_timeline) + + earlier = env.create_branch( + "earlier", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_pipe + ) + + snapshot_branchpoint = env.create_branch( + "snapshot_branchpoint", ancestor_branch_name="main", ancestor_start_lsn=branchpoint_x + ) + + branch_to_detach = env.create_branch( + "branch_to_detach", + ancestor_branch_name="snapshot_branchpoint", + ancestor_start_lsn=branchpoint_x, + ) + + after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None) + + all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach, behavior_v2=True) + assert set(all_reparented) == set() + + env.pageserver.quiesce_tenants() + + # checking the ancestor after is much faster than waiting for the endpoint not start + expected_result = [ + ("main", env.initial_timeline, None, 16384, 1), + ("after", after, env.initial_timeline, 16384, 1), + ("snapshot_branchpoint", snapshot_branchpoint, env.initial_timeline, 8192, 1), + ("branch_to_detach", branch_to_detach, None, 8192, 1), + ("earlier", earlier, env.initial_timeline, 0, 1), + ] + + assert isinstance(env.pageserver_remote_storage, LocalFsStorage) + + for branch_name, queried_timeline, expected_ancestor, _, _ in expected_result: + details = client.timeline_detail(env.initial_tenant, queried_timeline) + ancestor_timeline_id = details["ancestor_timeline_id"] + if expected_ancestor is None: + assert ancestor_timeline_id is None + else: + assert ( + TimelineId(ancestor_timeline_id) == expected_ancestor + ), f"when checking branch {branch_name}, mapping={expected_result}" + + index_part = env.pageserver_remote_storage.index_content( + env.initial_tenant, queried_timeline + ) + lineage = index_part["lineage"] + assert lineage is not None + + assert lineage.get("reparenting_history_overflown", "false") == "false" + + if queried_timeline == branch_to_detach: + original_ancestor = lineage["original_ancestor"] + assert original_ancestor is not None + assert original_ancestor[0] == str(env.initial_timeline) + assert original_ancestor[1] == str(branchpoint_x) + + # this does not contain Z in the end, so fromisoformat accepts it + # it is to be in line with the deletion timestamp.. well, almost. + when = original_ancestor[2][:26] + when_ts = datetime.datetime.fromisoformat(when).replace(tzinfo=datetime.UTC) + now = datetime.datetime.utcnow().replace(tzinfo=datetime.UTC) + assert when_ts < now + assert len(lineage.get("reparenting_history", [])) == 0 + elif expected_ancestor == branch_to_detach: + assert len(lineage.get("original_ancestor", [])) == 0 + assert lineage["reparenting_history"] == [str(env.initial_timeline)] + else: + assert len(lineage.get("original_ancestor", [])) == 0 + assert len(lineage.get("reparenting_history", [])) == 0 + + for name, _, _, rows, starts in expected_result: + with env.endpoints.create_start(name, tenant_id=env.initial_tenant) as ep: + assert ep.safe_psql("SELECT count(*) FROM foo;")[0][0] == rows + assert ep.safe_psql(f"SELECT count(*) FROM audit WHERE starts = {starts}")[0][0] == 1 + + # delete the new timeline to confirm it doesn't carry over the anything from the old timeline + client.timeline_delete(env.initial_tenant, branch_to_detach) + wait_timeline_detail_404(client, env.initial_tenant, branch_to_detach) + + # delete the after timeline + client.timeline_delete(env.initial_tenant, after) + wait_timeline_detail_404(client, env.initial_tenant, after) + + def test_detached_receives_flushes_while_being_detached(neon_env_builder: NeonEnvBuilder): """ Makes sure that the timeline is able to receive writes through-out the detach process. From 48be4df3f3a12c4d6ba049718e35d8a472a8de05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Wed, 12 Mar 2025 23:32:38 +0100 Subject: [PATCH 170/207] fix(ci): fetch all refs in release PR creation (#11201) ## Problem #11061 changed release PR creation, and I missed that we need to explicitly fetch the whole history so that the relevant git refs and objects are available. ## Summary of changes - Fetch all git refs including history by setting fetch-depth to 0 - Reference release branch as a remote branch, because we haven't checked it out locally --- .github/workflows/_create-release-pr.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml index 82acbc0f84..62266d876e 100644 --- a/.github/workflows/_create-release-pr.yml +++ b/.github/workflows/_create-release-pr.yml @@ -31,6 +31,7 @@ jobs: - uses: actions/checkout@v4 with: ref: ${{ inputs.source-branch }} + fetch-depth: 0 - name: Set variables id: vars @@ -69,7 +70,7 @@ jobs: # For details on why, look at # https://docs.neon.build/overview/repositories/neon.html#background-on-commit-history-of-release-prs current_tree=$(git rev-parse 'HEAD^{tree}') - release_head=$(git rev-parse "${RELEASE_BRANCH}") + release_head=$(git rev-parse "origin/${RELEASE_BRANCH}") current_head=$(git rev-parse HEAD) merge_commit=$(git commit-tree -p "${current_head}" -p "${release_head}" -m "${TITLE}" "${current_tree}") From 507353404c19f14c867db7341c94b06a954d6845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 13 Mar 2025 00:54:43 +0100 Subject: [PATCH 171/207] fix(ci): pass emtpy body when creating release PRs (#11203) ## Problem #11061 changed release pr creation, and I missed that creating PRs using `gh` in non-interactive environments *requires* `--body` instead of defaulting to an empty body. ## Summary of changes Explicitly set an empty body when creating release PRs. --- .github/workflows/_create-release-pr.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_create-release-pr.yml b/.github/workflows/_create-release-pr.yml index 62266d876e..9b1d1aa454 100644 --- a/.github/workflows/_create-release-pr.yml +++ b/.github/workflows/_create-release-pr.yml @@ -87,5 +87,6 @@ jobs: TITLE: ${{ steps.vars.outputs.title }} run: | gh pr create --title "${TITLE}" \ + --body "" \ --head "${RC_BRANCH}" \ --base "${RELEASE_BRANCH}" From afc9524bc7b6c6edbe22d98f780260ce90ef0b90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 13 Mar 2025 09:17:33 +0100 Subject: [PATCH 172/207] fix(ci): run lint-release-pr on head-ref (#11206) ## Problem #11061 changed release pr creation, and I missed that the workflow will checkout a would-be-merge of the rc branch and the release branch instead of the head ref, unless explicitly instructed otherwise. ## Summary of changes Check out head ref for linting the release PRs. --- .github/workflows/lint-release-pr.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/lint-release-pr.yml b/.github/workflows/lint-release-pr.yml index f12ddfe377..b7d010f66d 100644 --- a/.github/workflows/lint-release-pr.yml +++ b/.github/workflows/lint-release-pr.yml @@ -15,6 +15,7 @@ jobs: uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch full history for git operations + ref: ${{ github.event.pull_request.head.ref }} - name: Run lint script env: From 803e6f908a31343cd72f33b3019fbcdd986ad1d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 13 Mar 2025 10:42:38 +0100 Subject: [PATCH 173/207] fix(ci): fix syntax of lint-release-pr (#11208) ## Problem A small adjustment in #11061 broke the lint-release-pr.sh script, and the new version was neither tested nor linted. This has been done now, the script is once again tested and passing `shellcheck`. ## Summary of changes Add missing `el` of `elif` condition chain. --- .github/scripts/lint-release-pr.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/scripts/lint-release-pr.sh b/.github/scripts/lint-release-pr.sh index 8e081000f9..6dc5b99f0e 100755 --- a/.github/scripts/lint-release-pr.sh +++ b/.github/scripts/lint-release-pr.sh @@ -90,7 +90,7 @@ while [[ "${CURRENT_COMMIT}" != "${MERGE_BASE}" && "${CURRENT_COMMIT}" != "${EXP if [[ "${NEXT_COMMIT}" == "${MERGE_BASE}" ]]; then echo "✅ Reached merge base (${MERGE_BASE})" PR_BASE="${MERGE_BASE}" - if [[ "${NEXT_COMMIT}" == "${EXPECTED_RELEASE_HEAD}" ]]; then + elif [[ "${NEXT_COMMIT}" == "${EXPECTED_RELEASE_HEAD}" ]]; then echo "✅ Reached release branch (${EXPECTED_RELEASE_HEAD})" PR_BASE="${EXPECTED_RELEASE_HEAD}" elif [[ -z "${NEXT_COMMIT}" ]]; then From efb1df4362e313baa2d5969762c69a74d14d4e98 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 13 Mar 2025 10:17:01 +0000 Subject: [PATCH 174/207] =?UTF-8?q?fix:=20Change=20metric=5Funit=20from=20?= =?UTF-8?q?'microseconds'=20to=20'=CE=BCs'=20in=20test=5Fcompute=5Fctl=5Fa?= =?UTF-8?q?pi.py=20(#11209)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Fix metric_unit length in test_compute_ctl_api.py ## Description This PR changes the metric_unit from "microseconds" to "μs" in test_compute_ctl_api.py to fix the issue where perf test results were not being stored in the database due to the string exceeding the 10 character limit of the metric_unit column in the perf_test_results table. ## Problem As reported in Slack, the perf test results were not being uploaded to the database because the "microseconds" string (12 characters) exceeds the 10 character limit of the metric_unit column in the perf_test_results table. ## Solution Replace "microseconds" with "μs" in all metric_unit parameters in the test_compute_ctl_api.py file. ## Testing The changes have been committed and pushed. The PR is ready for review. Link to Devin run: https://app.devin.ai/sessions/e29edd672bd34114b059915820e8a853 Requested by: Peter Bendel Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: peterbendel@neon.tech --- test_runner/performance/test_compute_ctl_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test_runner/performance/test_compute_ctl_api.py b/test_runner/performance/test_compute_ctl_api.py index 87eb1f2c35..d6d0a84e8e 100644 --- a/test_runner/performance/test_compute_ctl_api.py +++ b/test_runner/performance/test_compute_ctl_api.py @@ -41,24 +41,24 @@ def test_compute_ctl_api_latencies( zenbenchmark.record( "status_response_latency_p50_us", status_response_latency_us[len(status_response_latency_us) // 2], - "microseconds", + "μs", MetricReport.LOWER_IS_BETTER, ) zenbenchmark.record( "metrics_response_latency_p50_us", metrics_response_latency_us[len(metrics_response_latency_us) // 2], - "microseconds", + "μs", MetricReport.LOWER_IS_BETTER, ) zenbenchmark.record( "status_response_latency_p99_us", status_response_latency_us[len(status_response_latency_us) * 99 // 100], - "microseconds", + "μs", MetricReport.LOWER_IS_BETTER, ) zenbenchmark.record( "metrics_response_latency_p99_us", metrics_response_latency_us[len(metrics_response_latency_us) * 99 // 100], - "microseconds", + "μs", MetricReport.LOWER_IS_BETTER, ) From 5a245a837dcee0c4e8d50ae20f167877a5fa3f08 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 14:28:10 +0100 Subject: [PATCH 175/207] storcon: retain stripe size when autosplitting sharded tenants (#11194) ## Problem Autosplits always request `DEFAULT_STRIPE_SIZE` for splits. However, splits do not allow changing the stripe size of already-sharded tenants, and will error out if it differs. In #11168, we are changing the stripe size, which could hit this when attempting to autosplit already sharded tenants. Touches #11168. ## Summary of changes Pass `new_stripe_size: None` when autosplitting already sharded tenants. Otherwise, pass `DEFAULT_STRIPE_SIZE` instead of the shard identity's stripe size, since we want to use the current default rather than an old, persisted default. --- storage_controller/src/service.rs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 667b53b725..445b174b96 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -7894,6 +7894,9 @@ impl Service { /// At most one tenant will be split per call: the one with the largest max logical size. It /// will split 1 → 8 shards. /// + /// An unsharded tenant will get DEFAULT_STRIPE_SIZE, regardless of what its ShardIdentity says. + /// A sharded tenant will retain its stripe size, as splits do not allow changing it. + /// /// TODO: consider splitting based on total logical size rather than max logical size. /// /// TODO: consider spawning multiple splits in parallel: this is only called once every 20 @@ -7939,6 +7942,16 @@ impl Service { "Auto-splitting tenant for size threshold {split_threshold}: current size {split_candidate:?}" ); + // Retain the stripe size of sharded tenants, as splits don't allow changing it. Otherwise, + // use DEFAULT_STRIPE_SIZE for unsharded tenants -- their stripe size doesn't really matter, + // and if we change the default stripe size we want to use the new default rather than an + // old, persisted stripe size. + let new_stripe_size = match split_candidate.id.shard_count.count() { + 0 => panic!("invalid shard count 0"), + 1 => Some(ShardParameters::DEFAULT_STRIPE_SIZE), + 2.. => None, + }; + let this = self.clone(); tokio::spawn( async move { @@ -7952,7 +7965,7 @@ impl Service { // because our max shard count is relatively low anyway. This policy // will be adjusted in future once we support higher shard count. new_shard_count: MAX_SHARDS.literal(), - new_stripe_size: Some(ShardParameters::DEFAULT_STRIPE_SIZE), + new_stripe_size, }, ) .await From 89c7e4e9171cba712dadddd5ca0203f5176b4109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 13 Mar 2025 14:40:43 +0100 Subject: [PATCH 176/207] fix(ci): use paranthesis for error handling in jq when fetching release PRs (#11217) ## Problem #11061 introduced code fetching previous releases. #11151 introduced jq error handling, which has also been applied in #11061, but parenthesis have been missed. ## Summary of changes Add parenthesis around error handling code. --- .github/workflows/_meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index c9e7b66efa..f029385980 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -125,5 +125,5 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} run: | - RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // "Faied to find Build and Test run from RC PR!" | halt_error(1)') + RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Faied to find Build and Test run from RC PR!" | halt_error(1))') echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT From c036fec06576d73a2a1dfc2579cee1dc33afee90 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 15:28:42 +0100 Subject: [PATCH 177/207] pageserver: enable `compaction_l0_first` by default (#11212) ## Problem `compaction_l0_first` has already been enabled in production for a couple of weeks. ## Summary of changes Enable `compaction_l0_first` by default. Also set `CompactFlags::NoYield` in `timeline_checkpoint_handler`, to ensure explicitly requested compaction runs to completion. This endpoint is mainly used in tests, and caused some flakiness where tests expected compaction to complete. --- libs/pageserver_api/src/config.rs | 9 ++++++--- pageserver/src/http/routes.rs | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index ce7de1e0c7..6e457823dd 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -272,10 +272,11 @@ pub struct TenantConfigToml { /// size exceeds `compaction_upper_limit * checkpoint_distance`. pub compaction_upper_limit: usize, pub compaction_algorithm: crate::models::CompactionAlgorithmSettings, - /// If true, compact down L0 across all tenant timelines before doing regular compaction. + /// If true, compact down L0 across all tenant timelines before doing regular compaction. L0 + /// compaction must be responsive to avoid read amp during heavy ingestion. Defaults to true. pub compaction_l0_first: bool, /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only - /// has an effect if `compaction_l0_first` is `true`. + /// has an effect if `compaction_l0_first` is true. Defaults to true. pub compaction_l0_semaphore: bool, /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure, /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer @@ -567,7 +568,9 @@ pub mod tenant_conf_defaults { // be reduced later by optimizing L0 hole calculation to avoid loading all keys into memory). So // with this config, we can get a maximum peak compaction usage of 9 GB. pub const DEFAULT_COMPACTION_UPPER_LIMIT: usize = 20; - pub const DEFAULT_COMPACTION_L0_FIRST: bool = false; + // Enable L0 compaction pass and semaphore by default. L0 compaction must be responsive to avoid + // read amp. + pub const DEFAULT_COMPACTION_L0_FIRST: bool = true; pub const DEFAULT_COMPACTION_L0_SEMAPHORE: bool = true; pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm = diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 44159aee0a..70c3cc8522 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2392,6 +2392,7 @@ async fn timeline_checkpoint_handler( let state = get_state(&request); let mut flags = EnumSet::empty(); + flags |= CompactFlags::NoYield; // run compaction to completion if Some(true) == parse_query_param::<_, bool>(&request, "force_l0_compaction")? { flags |= CompactFlags::ForceL0Compaction; } From b2286f5bcb1491227665d4fb6f35b229b8052e81 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 13 Mar 2025 10:38:45 -0400 Subject: [PATCH 178/207] fix(pageserver): don't panic if gc-compaction find no keys (#11200) ## Problem There was a panic on staging that compaction didn't find any keys. This is possible if all layers selected for compaction does not contain any keys within the current shard. ## Summary of changes Make panic an error. In the future, we can try creating an empty image layer so that GC can clean up those layers. Otherwise, for now, we can only rely on shard ancestor compaction to remove these data. Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index e6f2104e90..300daec9bf 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -3189,7 +3189,11 @@ impl Timeline { } // TODO: move the below part to the loop body - let last_key = last_key.expect("no keys produced during compaction"); + let Some(last_key) = last_key else { + return Err(CompactionError::Other(anyhow!( + "no keys produced during compaction" + ))); + }; stat.on_unique_key_visited(); let retention = self From 3dec1175728210af3b9a5c90d0d4938fafedeb6b Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 13 Mar 2025 15:03:22 +0000 Subject: [PATCH 179/207] feat(compute_ctl): use TLS if configured (#10972) Closes: https://github.com/neondatabase/cloud/issues/22998 If control-plane reports that TLS should be used, load the certificates (and watch for updates), make sure postgres use them, and detects updates. Procedure: 1. Load certificates 2. Reconfigure postgres/pgbouncer 3. Loop on a timer until certificates have loaded 4. Go to 1 Notes: 1. We only run this procedure if requested on startup by control plane. 2. We needed to compile pgbouncer with openssl enabled 3. Postgres doesn't allow tls keys to be globally accessible - must be read only to the postgres user. I couldn't convince the autoscaling team to let me put this logic into the VM settings, so instead compute_ctl will copy the keys to be read-only by postgres. 4. To mitigate a race condition, we also verify that the key matches the cert. --- Cargo.lock | 71 +++++++++++++++- Cargo.toml | 2 +- compute/compute-node.Dockerfile | 4 +- compute_tools/Cargo.toml | 7 +- compute_tools/src/compute.rs | 95 ++++++++++++++++++--- compute_tools/src/config.rs | 20 ++++- compute_tools/src/http/server.rs | 8 +- compute_tools/src/lib.rs | 1 + compute_tools/src/pg_helpers.rs | 42 ++++++--- compute_tools/src/tls.rs | 118 ++++++++++++++++++++++++++ libs/compute_api/Cargo.toml | 1 + libs/compute_api/src/responses.rs | 8 ++ libs/compute_api/src/spec.rs | 8 +- proxy/src/binary/local_proxy.rs | 39 +++++++-- proxy/src/binary/proxy.rs | 2 + proxy/src/config.rs | 3 +- proxy/src/console_redirect_proxy.rs | 3 +- proxy/src/proxy/handshake.rs | 2 +- proxy/src/proxy/mod.rs | 3 +- proxy/src/proxy/tests/mod.rs | 8 +- proxy/src/serverless/mod.rs | 38 +++------ proxy/src/serverless/sql_over_http.rs | 4 +- proxy/src/tls/server_config.rs | 20 +++-- workspace_hack/Cargo.toml | 7 +- 24 files changed, 427 insertions(+), 87 deletions(-) create mode 100644 compute_tools/src/tls.rs diff --git a/Cargo.lock b/Cargo.lock index 1721c185f0..898ff1eabb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1309,6 +1309,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "indexmap 2.0.1", "jsonwebtoken", "regex", "remote_storage", @@ -1339,6 +1340,7 @@ dependencies = [ "flate2", "futures", "http 1.1.0", + "indexmap 2.0.1", "jsonwebtoken", "metrics", "nix 0.27.1", @@ -1347,17 +1349,20 @@ dependencies = [ "once_cell", "opentelemetry", "opentelemetry_sdk", + "p256 0.13.2", "postgres", "postgres_initdb", "regex", "remote_storage", "reqwest", + "ring", "rlimit", "rust-ini", "serde", "serde_json", "serde_with", "signal-hook", + "spki 0.7.3", "tar", "thiserror 1.0.69", "tokio", @@ -1377,6 +1382,7 @@ dependencies = [ "vm_monitor", "walkdir", "workspace_hack", + "x509-cert", "zstd", ] @@ -1801,6 +1807,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fffa369a668c8af7dbf8b5e56c9f744fbd399949ed171606040001947de40b1c" dependencies = [ "const-oid", + "der_derive", + "flagset", "pem-rfc7468", "zeroize", ] @@ -1819,6 +1827,17 @@ dependencies = [ "rusticata-macros", ] +[[package]] +name = "der_derive" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "deranged" version = "0.3.11" @@ -2282,6 +2301,12 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" +[[package]] +name = "flagset" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" + [[package]] name = "flate2" version = "1.0.26" @@ -6425,9 +6450,9 @@ dependencies = [ [[package]] name = "sha1" -version = "0.10.5" +version = "0.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f04293dc80c3993519f2d7f6f511707ee7094fe0c6d3406feb330cdb3540eba3" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" dependencies = [ "cfg-if", "cpufeatures", @@ -7135,6 +7160,27 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" +[[package]] +name = "tls_codec" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de2e01245e2bb89d6f05801c564fa27624dbd7b1846859876c7dad82e90bf6b" +dependencies = [ + "tls_codec_derive", + "zeroize", +] + +[[package]] +name = "tls_codec_derive" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "tokio" version = "1.43.0" @@ -8387,12 +8433,15 @@ dependencies = [ "chrono", "clap", "clap_builder", + "const-oid", "crypto-bigint 0.5.5", "der 0.7.8", "deranged", "digest", "displaydoc", + "ecdsa 0.16.9", "either", + "elliptic-curve 0.13.8", "env_filter", "env_logger", "fail", @@ -8427,6 +8476,7 @@ dependencies = [ "num-rational", "num-traits", "once_cell", + "p256 0.13.2", "parquet", "prettyplease", "proc-macro2", @@ -8439,6 +8489,7 @@ dependencies = [ "reqwest", "rustls 0.23.18", "scopeguard", + "sec1 0.7.3", "serde", "serde_json", "sha2", @@ -8484,6 +8535,18 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "x509-cert" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1301e935010a701ae5f8655edc0ad17c44bad3ac5ce8c39185f75453b720ae94" +dependencies = [ + "const-oid", + "der 0.7.8", + "spki 0.7.3", + "tls_codec", +] + [[package]] name = "x509-certificate" version = "0.23.1" @@ -8612,9 +8675,9 @@ dependencies = [ [[package]] name = "zeroize" -version = "1.7.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" +checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" dependencies = [ "serde", "zeroize_derive", diff --git a/Cargo.toml b/Cargo.toml index 7b86a64e9a..82fb463182 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -112,7 +112,7 @@ hyper0 = { package = "hyper", version = "0.14" } hyper = "1.4" hyper-util = "0.1" tokio-tungstenite = "0.21.0" -indexmap = "2" +indexmap = { version = "2", features = ["serde"] } indoc = "2" ipnet = "2.10.0" itertools = "0.10" diff --git a/compute/compute-node.Dockerfile b/compute/compute-node.Dockerfile index 6e46185e36..d5483018b4 100644 --- a/compute/compute-node.Dockerfile +++ b/compute/compute-node.Dockerfile @@ -1735,6 +1735,8 @@ RUN set -e \ libevent-dev \ libtool \ pkg-config \ + libcurl4-openssl-dev \ + libssl-dev \ && apt clean && rm -rf /var/lib/apt/lists/* # Use `dist_man_MANS=` to skip manpage generation (which requires python3/pandoc) @@ -1743,7 +1745,7 @@ RUN set -e \ && git clone --recurse-submodules --depth 1 --branch ${PGBOUNCER_TAG} https://github.com/pgbouncer/pgbouncer.git pgbouncer \ && cd pgbouncer \ && ./autogen.sh \ - && ./configure --prefix=/usr/local/pgbouncer --without-openssl \ + && ./configure --prefix=/usr/local/pgbouncer \ && make -j $(nproc) dist_man_MANS= \ && make install dist_man_MANS= diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index dd2896714d..90951e7ddb 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -26,6 +26,7 @@ fail.workspace = true flate2.workspace = true futures.workspace = true http.workspace = true +indexmap.workspace = true jsonwebtoken.workspace = true metrics.workspace = true nix.workspace = true @@ -34,16 +35,19 @@ num_cpus.workspace = true once_cell.workspace = true opentelemetry.workspace = true opentelemetry_sdk.workspace = true +p256 = { version = "0.13", features = ["pem"] } postgres.workspace = true regex.workspace = true +reqwest = { workspace = true, features = ["json"] } +ring = "0.17" serde.workspace = true serde_with.workspace = true serde_json.workspace = true signal-hook.workspace = true +spki = { version = "0.7.3", features = ["std"] } tar.workspace = true tower.workspace = true tower-http.workspace = true -reqwest = { workspace = true, features = ["json"] } tokio = { workspace = true, features = ["rt", "rt-multi-thread"] } tokio-postgres.workspace = true tokio-util.workspace = true @@ -57,6 +61,7 @@ thiserror.workspace = true url.workspace = true uuid.workspace = true walkdir.workspace = true +x509-cert = { version = "0.2.5" } postgres_initdb.workspace = true compute_api.workspace = true diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index c2a3e38ed6..a0654ea0e4 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -41,6 +41,7 @@ use crate::rsyslog::configure_audit_rsyslog; use crate::spec::*; use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; +use crate::tls::watch_cert_for_changes; use crate::{config, extension_server, local_proxy}; pub static SYNC_SAFEKEEPERS_PID: AtomicU32 = AtomicU32::new(0); @@ -112,6 +113,7 @@ pub struct ComputeNode { // key: ext_archive_name, value: started download time, download_completed? pub ext_download_progress: RwLock, bool)>>, + pub compute_ctl_config: ComputeCtlConfig, } // store some metrics about download size that might impact startup time @@ -135,8 +137,6 @@ pub struct ComputeState { /// passed by the control plane with a /configure HTTP request. pub pspec: Option, - pub compute_ctl_config: ComputeCtlConfig, - /// If the spec is passed by a /configure request, 'startup_span' is the /// /configure request's tracing span. The main thread enters it when it /// processes the compute startup, so that the compute startup is considered @@ -160,7 +160,6 @@ impl ComputeState { last_active: None, error: None, pspec: None, - compute_ctl_config: ComputeCtlConfig::default(), startup_span: None, metrics: ComputeMetrics::default(), } @@ -314,7 +313,6 @@ impl ComputeNode { let pspec = ParsedSpec::try_from(cli_spec).map_err(|msg| anyhow::anyhow!(msg))?; new_state.pspec = Some(pspec); } - new_state.compute_ctl_config = compute_ctl_config; Ok(ComputeNode { params, @@ -323,6 +321,7 @@ impl ComputeNode { state: Mutex::new(new_state), state_changed: Condvar::new(), ext_download_progress: RwLock::new(HashMap::new()), + compute_ctl_config, }) } @@ -345,7 +344,7 @@ impl ComputeNode { // requests while configuration is still in progress. crate::http::server::Server::External { port: this.params.external_http_port, - jwks: this.state.lock().unwrap().compute_ctl_config.jwks.clone(), + config: this.compute_ctl_config.clone(), compute_id: this.params.compute_id.clone(), } .launch(&this); @@ -524,6 +523,16 @@ impl ComputeNode { // Collect all the tasks that must finish here let mut pre_tasks = tokio::task::JoinSet::new(); + // Make sure TLS certificates are properly loaded and in the right place. + if self.compute_ctl_config.tls.is_some() { + let this = self.clone(); + pre_tasks.spawn(async move { + this.watch_cert_for_changes().await; + + Ok::<(), anyhow::Error>(()) + }); + } + // If there are any remote extensions in shared_preload_libraries, start downloading them if pspec.spec.remote_extensions.is_some() { let (this, spec) = (self.clone(), pspec.spec.clone()); @@ -579,11 +588,13 @@ impl ComputeNode { if let Some(pgbouncer_settings) = &pspec.spec.pgbouncer_settings { info!("tuning pgbouncer"); + let pgbouncer_settings = pgbouncer_settings.clone(); + let tls_config = self.compute_ctl_config.tls.clone(); + // Spawn a background task to do the tuning, // so that we don't block the main thread that starts Postgres. - let pgbouncer_settings = pgbouncer_settings.clone(); let _handle = tokio::spawn(async move { - let res = tune_pgbouncer(pgbouncer_settings).await; + let res = tune_pgbouncer(pgbouncer_settings, tls_config).await; if let Err(err) = res { error!("error while tuning pgbouncer: {err:?}"); // Continue with the startup anyway @@ -1105,9 +1116,10 @@ impl ComputeNode { // Remove/create an empty pgdata directory and put configuration there. self.create_pgdata()?; config::write_postgres_conf( - &pgdata_path.join("postgresql.conf"), + pgdata_path, &pspec.spec, self.params.internal_http_port, + &self.compute_ctl_config.tls, )?; // Syncing safekeepers is only safe with primary nodes: if a primary @@ -1489,11 +1501,13 @@ impl ComputeNode { if let Some(ref pgbouncer_settings) = spec.pgbouncer_settings { info!("tuning pgbouncer"); + let pgbouncer_settings = pgbouncer_settings.clone(); + let tls_config = self.compute_ctl_config.tls.clone(); + // Spawn a background task to do the tuning, // so that we don't block the main thread that starts Postgres. - let pgbouncer_settings = pgbouncer_settings.clone(); tokio::spawn(async move { - let res = tune_pgbouncer(pgbouncer_settings).await; + let res = tune_pgbouncer(pgbouncer_settings, tls_config).await; if let Err(err) = res { error!("error while tuning pgbouncer: {err:?}"); } @@ -1505,7 +1519,8 @@ impl ComputeNode { // Spawn a background task to do the configuration, // so that we don't block the main thread that starts Postgres. - let local_proxy = local_proxy.clone(); + let mut local_proxy = local_proxy.clone(); + local_proxy.tls = self.compute_ctl_config.tls.clone(); tokio::spawn(async move { if let Err(err) = local_proxy::configure(&local_proxy) { error!("error while configuring local_proxy: {err:?}"); @@ -1515,8 +1530,12 @@ impl ComputeNode { // Write new config let pgdata_path = Path::new(&self.params.pgdata); - let postgresql_conf_path = pgdata_path.join("postgresql.conf"); - config::write_postgres_conf(&postgresql_conf_path, &spec, self.params.internal_http_port)?; + config::write_postgres_conf( + pgdata_path, + &spec, + self.params.internal_http_port, + &self.compute_ctl_config.tls, + )?; if !spec.skip_pg_catalog_updates { let max_concurrent_connections = spec.reconfigure_concurrency; @@ -1587,6 +1606,56 @@ impl ComputeNode { Ok(()) } + pub async fn watch_cert_for_changes(self: Arc) { + // update status on cert renewal + if let Some(tls_config) = &self.compute_ctl_config.tls { + let tls_config = tls_config.clone(); + + // wait until the cert exists. + let mut cert_watch = watch_cert_for_changes(tls_config.cert_path.clone()).await; + + tokio::task::spawn_blocking(move || { + let handle = tokio::runtime::Handle::current(); + 'cert_update: loop { + // let postgres/pgbouncer/local_proxy know the new cert/key exists. + // we need to wait until it's configurable first. + + let mut state = self.state.lock().unwrap(); + 'status_update: loop { + match state.status { + // let's update the state to config pending + ComputeStatus::ConfigurationPending | ComputeStatus::Running => { + state.set_status( + ComputeStatus::ConfigurationPending, + &self.state_changed, + ); + break 'status_update; + } + + // exit loop + ComputeStatus::Failed + | ComputeStatus::TerminationPending + | ComputeStatus::Terminated => break 'cert_update, + + // wait + ComputeStatus::Init + | ComputeStatus::Configuration + | ComputeStatus::Empty => { + state = self.state_changed.wait(state).unwrap(); + } + } + } + drop(state); + + // wait for a new certificate update + if handle.block_on(cert_watch.changed()).is_err() { + break; + } + } + }); + } + } + /// Update the `last_active` in the shared state, but ensure that it's a more recent one. pub fn update_last_active(&self, last_active: Option>) { let mut state = self.state.lock().unwrap(); diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index 0760568ff8..7aa7360f9d 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -6,11 +6,13 @@ use std::io::Write; use std::io::prelude::*; use std::path::Path; +use compute_api::responses::TlsConfig; use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption}; use crate::pg_helpers::{ GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value, }; +use crate::tls::{self, SERVER_CRT, SERVER_KEY}; /// Check that `line` is inside a text file and put it there if it is not. /// Create file if it doesn't exist. @@ -38,10 +40,12 @@ pub fn line_in_file(path: &Path, line: &str) -> Result { /// Create or completely rewrite configuration file specified by `path` pub fn write_postgres_conf( - path: &Path, + pgdata_path: &Path, spec: &ComputeSpec, extension_server_port: u16, + tls_config: &Option, ) -> Result<()> { + let path = pgdata_path.join("postgresql.conf"); // File::create() destroys the file content if it exists. let mut file = File::create(path)?; @@ -86,6 +90,20 @@ pub fn write_postgres_conf( )?; } + // tls + if let Some(tls_config) = tls_config { + writeln!(file, "ssl = on")?; + + // postgres requires the keyfile to be in a secure file, + // currently too complicated to ensure that at the VM level, + // so we just copy them to another file instead. :shrug: + tls::update_key_path_blocking(pgdata_path, tls_config); + + // these are the default, but good to be explicit. + writeln!(file, "ssl_cert_file = '{}'", SERVER_CRT)?; + writeln!(file, "ssl_key_file = '{}'", SERVER_KEY)?; + } + // Locales if cfg!(target_os = "macos") { writeln!(file, "lc_messages='C'")?; diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index b70b6c619c..10f767e97c 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -8,8 +8,8 @@ use axum::Router; use axum::middleware::{self}; use axum::response::IntoResponse; use axum::routing::{get, post}; +use compute_api::responses::ComputeCtlConfig; use http::StatusCode; -use jsonwebtoken::jwk::JwkSet; use tokio::net::TcpListener; use tower::ServiceBuilder; use tower_http::{ @@ -41,7 +41,7 @@ pub enum Server { }, External { port: u16, - jwks: JwkSet, + config: ComputeCtlConfig, compute_id: String, }, } @@ -79,7 +79,7 @@ impl From<&Server> for Router> { router } Server::External { - jwks, compute_id, .. + config, compute_id, .. } => { let unauthenticated_router = Router::>::new().route("/metrics", get(metrics::get_metrics)); @@ -95,7 +95,7 @@ impl From<&Server> for Router> { .route("/terminate", post(terminate::terminate)) .layer(AsyncRequireAuthorizationLayer::new(Authorize::new( compute_id.clone(), - jwks.clone(), + config.jwks.clone(), ))); router diff --git a/compute_tools/src/lib.rs b/compute_tools/src/lib.rs index 5c78bbcd02..a681fad0b0 100644 --- a/compute_tools/src/lib.rs +++ b/compute_tools/src/lib.rs @@ -26,3 +26,4 @@ pub mod spec; mod spec_apply; pub mod swap; pub mod sync_sk; +pub mod tls; diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index dd8d8e9b8b..802e3e93d9 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -10,8 +10,10 @@ use std::str::FromStr; use std::time::{Duration, Instant}; use anyhow::{Result, bail}; +use compute_api::responses::TlsConfig; use compute_api::spec::{Database, GenericOption, GenericOptions, PgIdent, Role}; use futures::StreamExt; +use indexmap::IndexMap; use ini::Ini; use notify::{RecursiveMode, Watcher}; use postgres::config::Config; @@ -406,7 +408,7 @@ pub fn create_pgdata(pgdata: &str) -> Result<()> { /// Update pgbouncer.ini with provided options fn update_pgbouncer_ini( - pgbouncer_config: HashMap, + pgbouncer_config: IndexMap, pgbouncer_ini_path: &str, ) -> Result<()> { let mut conf = Ini::load_from_file(pgbouncer_ini_path)?; @@ -427,7 +429,10 @@ fn update_pgbouncer_ini( /// Tune pgbouncer. /// 1. Apply new config using pgbouncer admin console /// 2. Add new values to pgbouncer.ini to preserve them after restart -pub async fn tune_pgbouncer(pgbouncer_config: HashMap) -> Result<()> { +pub async fn tune_pgbouncer( + mut pgbouncer_config: IndexMap, + tls_config: Option, +) -> Result<()> { let pgbouncer_connstr = if std::env::var_os("AUTOSCALING").is_some() { // for VMs use pgbouncer specific way to connect to // pgbouncer admin console without password @@ -473,19 +478,21 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap) -> Result } }; - // Apply new config - for (option_name, value) in pgbouncer_config.iter() { - let query = format!("SET {}={}", option_name, value); - // keep this log line for debugging purposes - info!("Applying pgbouncer setting change: {}", query); + if let Some(tls_config) = tls_config { + // pgbouncer starts in a half-ok state if it cannot find these files. + // It will default to client_tls_sslmode=deny, which causes proxy to error. + // There is a small window at startup where these files don't yet exist in the VM. + // Best to wait until it exists. + loop { + if let Ok(true) = tokio::fs::try_exists(&tls_config.key_path).await { + break; + } + tokio::time::sleep(Duration::from_millis(500)).await + } - if let Err(err) = client.simple_query(&query).await { - // Don't fail on error, just print it into log - error!( - "Failed to apply pgbouncer setting change: {}, {}", - query, err - ); - }; + pgbouncer_config.insert("client_tls_cert_file".to_string(), tls_config.cert_path); + pgbouncer_config.insert("client_tls_key_file".to_string(), tls_config.key_path); + pgbouncer_config.insert("client_tls_sslmode".to_string(), "allow".to_string()); } // save values to pgbouncer.ini @@ -501,6 +508,13 @@ pub async fn tune_pgbouncer(pgbouncer_config: HashMap) -> Result }; update_pgbouncer_ini(pgbouncer_config, &pgbouncer_ini_path)?; + info!("Applying pgbouncer setting change"); + + if let Err(err) = client.simple_query("RELOAD").await { + // Don't fail on error, just print it into log + error!("Failed to apply pgbouncer setting change, {err}",); + }; + Ok(()) } diff --git a/compute_tools/src/tls.rs b/compute_tools/src/tls.rs new file mode 100644 index 0000000000..5a310d8ac4 --- /dev/null +++ b/compute_tools/src/tls.rs @@ -0,0 +1,118 @@ +use std::{io::Write, os::unix::fs::OpenOptionsExt, path::Path, time::Duration}; + +use anyhow::{Context, Result, bail}; +use compute_api::responses::TlsConfig; +use ring::digest; +use spki::ObjectIdentifier; +use spki::der::{Decode, PemReader}; +use x509_cert::Certificate; + +#[derive(Clone, Copy)] +pub struct CertDigest(digest::Digest); + +pub async fn watch_cert_for_changes(cert_path: String) -> tokio::sync::watch::Receiver { + let mut digest = compute_digest(&cert_path).await; + let (tx, rx) = tokio::sync::watch::channel(digest); + tokio::spawn(async move { + while !tx.is_closed() { + let new_digest = compute_digest(&cert_path).await; + if digest.0.as_ref() != new_digest.0.as_ref() { + digest = new_digest; + _ = tx.send(digest); + } + + tokio::time::sleep(Duration::from_secs(60)).await + } + }); + rx +} + +async fn compute_digest(cert_path: &str) -> CertDigest { + loop { + match try_compute_digest(cert_path).await { + Ok(d) => break d, + Err(e) => { + tracing::error!("could not read cert file {e:?}"); + tokio::time::sleep(Duration::from_secs(1)).await + } + } + } +} + +async fn try_compute_digest(cert_path: &str) -> Result { + let data = tokio::fs::read(cert_path).await?; + // sha256 is extremely collision resistent. can safely assume the digest to be unique + Ok(CertDigest(digest::digest(&digest::SHA256, &data))) +} + +pub const SERVER_CRT: &str = "server.crt"; +pub const SERVER_KEY: &str = "server.key"; + +pub fn update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) { + loop { + match try_update_key_path_blocking(pg_data, tls_config) { + Ok(()) => break, + Err(e) => { + tracing::error!("could not create key file {e:?}"); + std::thread::sleep(Duration::from_secs(1)) + } + } + } +} + +// Postgres requires the keypath be "secure". This means +// 1. Owned by the postgres user. +// 2. Have permission 600. +fn try_update_key_path_blocking(pg_data: &Path, tls_config: &TlsConfig) -> Result<()> { + let key = std::fs::read_to_string(&tls_config.key_path)?; + let crt = std::fs::read_to_string(&tls_config.cert_path)?; + + // to mitigate a race condition during renewal. + verify_key_cert(&key, &crt)?; + + let mut key_file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(pg_data.join(SERVER_KEY))?; + + let mut crt_file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .mode(0o600) + .open(pg_data.join(SERVER_CRT))?; + + key_file.write_all(key.as_bytes())?; + crt_file.write_all(crt.as_bytes())?; + + Ok(()) +} + +fn verify_key_cert(key: &str, cert: &str) -> Result<()> { + const ECDSA_WITH_SHA256: ObjectIdentifier = ObjectIdentifier::new_unwrap("1.2.840.10045.4.3.2"); + + let cert = Certificate::decode(&mut PemReader::new(cert.as_bytes()).context("pem reader")?) + .context("decode cert")?; + + match cert.signature_algorithm.oid { + ECDSA_WITH_SHA256 => { + let key = p256::SecretKey::from_sec1_pem(key).context("parse key")?; + + let a = key.public_key().to_sec1_bytes(); + let b = cert + .tbs_certificate + .subject_public_key_info + .subject_public_key + .raw_bytes(); + + if *a != *b { + bail!("private key file does not match certificate") + } + } + _ => bail!("unknown TLS key type"), + } + + Ok(()) +} diff --git a/libs/compute_api/Cargo.toml b/libs/compute_api/Cargo.toml index 0d1618c1b2..81b0cd19a1 100644 --- a/libs/compute_api/Cargo.toml +++ b/libs/compute_api/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] anyhow.workspace = true chrono.workspace = true +indexmap.workspace = true jsonwebtoken.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/libs/compute_api/src/responses.rs b/libs/compute_api/src/responses.rs index 3300fbf7dd..c8f6019c5c 100644 --- a/libs/compute_api/src/responses.rs +++ b/libs/compute_api/src/responses.rs @@ -139,6 +139,7 @@ pub struct ComputeCtlConfig { /// Set of JSON web keys that the compute can use to authenticate /// communication from the control plane. pub jwks: JwkSet, + pub tls: Option, } impl Default for ComputeCtlConfig { @@ -147,10 +148,17 @@ impl Default for ComputeCtlConfig { jwks: JwkSet { keys: Vec::default(), }, + tls: None, } } } +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct TlsConfig { + pub key_path: String, + pub cert_path: String, +} + /// Response of the `/computes/{compute_id}/spec` control-plane API. #[derive(Deserialize, Debug)] pub struct ControlPlaneSpecResponse { diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index 77f2e1e631..af4264f8d2 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -5,12 +5,15 @@ //! and connect it to the storage nodes. use std::collections::HashMap; +use indexmap::IndexMap; use regex::Regex; use remote_storage::RemotePath; use serde::{Deserialize, Serialize}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; +use crate::responses::TlsConfig; + /// String type alias representing Postgres identifier and /// intended to be used for DB / role names. pub type PgIdent = String; @@ -125,7 +128,7 @@ pub struct ComputeSpec { // information about available remote extensions pub remote_extensions: Option, - pub pgbouncer_settings: Option>, + pub pgbouncer_settings: Option>, // Stripe size for pageserver sharding, in pages #[serde(default)] @@ -357,6 +360,9 @@ pub struct LocalProxySpec { #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] pub jwks: Option>, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub tls: Option, } #[derive(Clone, Debug, Deserialize, Serialize)] diff --git a/proxy/src/binary/local_proxy.rs b/proxy/src/binary/local_proxy.rs index dedd225cba..ee7f6ffcd7 100644 --- a/proxy/src/binary/local_proxy.rs +++ b/proxy/src/binary/local_proxy.rs @@ -5,6 +5,7 @@ use std::sync::Arc; use std::time::Duration; use anyhow::{Context, bail, ensure}; +use arc_swap::ArcSwapOption; use camino::{Utf8Path, Utf8PathBuf}; use clap::Parser; use compute_api::spec::LocalProxySpec; @@ -27,6 +28,7 @@ use crate::config::{ }; use crate::control_plane::locks::ApiLocks; use crate::control_plane::messages::{EndpointJwksResponse, JwksSettings}; +use crate::ext::TaskExt; use crate::http::health_server::AppMetrics; use crate::intern::RoleNameInt; use crate::metrics::{Metrics, ThreadPoolMetrics}; @@ -190,7 +192,11 @@ pub async fn run() -> anyhow::Result<()> { // 2. The config file is written but the signal hook is not yet received // 3. local_proxy completes startup but has no config loaded, despite there being a registerd config. refresh_config_notify.notify_one(); - tokio::spawn(refresh_config_loop(args.config_path, refresh_config_notify)); + tokio::spawn(refresh_config_loop( + config, + args.config_path, + refresh_config_notify, + )); maintenance_tasks.spawn(crate::http::health_server::task_main( metrics_listener, @@ -269,7 +275,7 @@ fn build_config(args: &LocalProxyCliArgs) -> anyhow::Result<&'static ProxyConfig }; Ok(Box::leak(Box::new(ProxyConfig { - tls_config: None, + tls_config: ArcSwapOption::from(None), metric_collection: None, http_config, authentication_config: AuthenticationConfig { @@ -311,14 +317,16 @@ enum RefreshConfigError { Parse(#[from] serde_json::Error), #[error(transparent)] Validate(anyhow::Error), + #[error(transparent)] + Tls(anyhow::Error), } -async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc) { +async fn refresh_config_loop(config: &ProxyConfig, path: Utf8PathBuf, rx: Arc) { let mut init = true; loop { rx.notified().await; - match refresh_config_inner(&path).await { + match refresh_config_inner(config, &path).await { Ok(()) => {} // don't log for file not found errors if this is the first time we are checking // for computes that don't use local_proxy, this is not an error. @@ -327,6 +335,9 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc) { { debug!(error=?e, ?path, "could not read config file"); } + Err(RefreshConfigError::Tls(e)) => { + error!(error=?e, ?path, "could not read TLS certificates"); + } Err(e) => { error!(error=?e, ?path, "could not read config file"); } @@ -336,7 +347,10 @@ async fn refresh_config_loop(path: Utf8PathBuf, rx: Arc) { } } -async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> { +async fn refresh_config_inner( + config: &ProxyConfig, + path: &Utf8Path, +) -> Result<(), RefreshConfigError> { let bytes = tokio::fs::read(&path).await?; let data: LocalProxySpec = serde_json::from_slice(&bytes)?; @@ -406,5 +420,20 @@ async fn refresh_config_inner(path: &Utf8Path) -> Result<(), RefreshConfigError> info!("successfully loaded new config"); JWKS_ROLE_MAP.store(Some(Arc::new(EndpointJwksResponse { jwks: jwks_set }))); + if let Some(tls_config) = data.tls { + let tls_config = tokio::task::spawn_blocking(move || { + crate::tls::server_config::configure_tls( + &tls_config.key_path, + &tls_config.cert_path, + None, + false, + ) + }) + .await + .propagate_task_panic() + .map_err(RefreshConfigError::Tls)?; + config.tls_config.store(Some(Arc::new(tls_config))); + } + Ok(()) } diff --git a/proxy/src/binary/proxy.rs b/proxy/src/binary/proxy.rs index eec0bf8f99..feca5ccf88 100644 --- a/proxy/src/binary/proxy.rs +++ b/proxy/src/binary/proxy.rs @@ -4,6 +4,7 @@ use std::sync::Arc; use std::time::Duration; use anyhow::bail; +use arc_swap::ArcSwapOption; use futures::future::Either; use remote_storage::RemoteStorageConfig; use tokio::net::TcpListener; @@ -563,6 +564,7 @@ fn build_config(args: &ProxyCliArgs) -> anyhow::Result<&'static ProxyConfig> { (None, None) => None, _ => bail!("either both or neither tls-key and tls-cert must be specified"), }; + let tls_config = ArcSwapOption::from(tls_config.map(Arc::new)); let backup_metric_collection_config = config::MetricBackupCollectionConfig { remote_storage_config: args.metric_backup_collection_remote_storage.clone(), diff --git a/proxy/src/config.rs b/proxy/src/config.rs index 1bcd22e98f..ad398c122c 100644 --- a/proxy/src/config.rs +++ b/proxy/src/config.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use std::time::Duration; use anyhow::{Context, Ok, bail, ensure}; +use arc_swap::ArcSwapOption; use clap::ValueEnum; use remote_storage::RemoteStorageConfig; @@ -17,7 +18,7 @@ pub use crate::tls::server_config::{TlsConfig, configure_tls}; use crate::types::Host; pub struct ProxyConfig { - pub tls_config: Option, + pub tls_config: ArcSwapOption, pub metric_collection: Option, pub http_config: HttpConfig, pub authentication_config: AuthenticationConfig, diff --git a/proxy/src/console_redirect_proxy.rs b/proxy/src/console_redirect_proxy.rs index 4662860b3f..1156545f34 100644 --- a/proxy/src/console_redirect_proxy.rs +++ b/proxy/src/console_redirect_proxy.rs @@ -177,7 +177,8 @@ pub(crate) async fn handle_client( let proto = ctx.protocol(); let request_gauge = metrics.connection_requests.guard(proto); - let tls = config.tls_config.as_ref(); + let tls = config.tls_config.load(); + let tls = tls.as_deref(); let record_handshake_error = !ctx.has_private_peer_addr(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client); diff --git a/proxy/src/proxy/handshake.rs b/proxy/src/proxy/handshake.rs index 955f754497..2582e4c069 100644 --- a/proxy/src/proxy/handshake.rs +++ b/proxy/src/proxy/handshake.rs @@ -114,7 +114,7 @@ pub(crate) async fn handshake( let mut read_buf = read_buf.reader(); let mut res = Ok(()); - let accept = tokio_rustls::TlsAcceptor::from(tls.to_server_config()) + let accept = tokio_rustls::TlsAcceptor::from(tls.pg_config.clone()) .accept_with(raw, |session| { // push the early data to the tls session while !read_buf.get_ref().is_empty() { diff --git a/proxy/src/proxy/mod.rs b/proxy/src/proxy/mod.rs index 0c6d352600..2e7d332a8b 100644 --- a/proxy/src/proxy/mod.rs +++ b/proxy/src/proxy/mod.rs @@ -278,7 +278,8 @@ pub(crate) async fn handle_client( let proto = ctx.protocol(); let request_gauge = metrics.connection_requests.guard(proto); - let tls = config.tls_config.as_ref(); + let tls = config.tls_config.load(); + let tls = tls.as_deref(); let record_handshake_error = !ctx.has_private_peer_addr(); let pause = ctx.latency_timer_pause(crate::metrics::Waiting::Client); diff --git a/proxy/src/proxy/tests/mod.rs b/proxy/src/proxy/tests/mod.rs index e0b7539538..2c3e70138d 100644 --- a/proxy/src/proxy/tests/mod.rs +++ b/proxy/src/proxy/tests/mod.rs @@ -96,16 +96,18 @@ fn generate_tls_config<'a>( .with_safe_default_protocol_versions() .context("ring should support the default protocol versions")? .with_no_client_auth() - .with_single_cert(vec![cert.clone()], key.clone_key())? - .into(); + .with_single_cert(vec![cert.clone()], key.clone_key())?; let mut cert_resolver = CertResolver::new(); cert_resolver.add_cert(key, vec![cert], true)?; let common_names = cert_resolver.get_common_names(); + let config = Arc::new(config); + TlsConfig { - config, + http_config: config.clone(), + pg_config: config, common_names, cert_resolver: Arc::new(cert_resolver), } diff --git a/proxy/src/serverless/mod.rs b/proxy/src/serverless/mod.rs index a7f46cbe58..00164d631a 100644 --- a/proxy/src/serverless/mod.rs +++ b/proxy/src/serverless/mod.rs @@ -19,6 +19,7 @@ use std::pin::{Pin, pin}; use std::sync::Arc; use anyhow::Context; +use arc_swap::ArcSwapOption; use async_trait::async_trait; use atomic_take::AtomicTake; use bytes::Bytes; @@ -117,18 +118,7 @@ pub async fn task_main( auth_backend, endpoint_rate_limiter: Arc::clone(&endpoint_rate_limiter), }); - let tls_acceptor: Arc = match config.tls_config.as_ref() { - Some(config) => { - let mut tls_server_config = rustls::ServerConfig::clone(&config.to_server_config()); - // prefer http2, but support http/1.1 - tls_server_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; - Arc::new(tls_server_config) - } - None => { - warn!("TLS config is missing"); - Arc::new(NoTls) - } - }; + let tls_acceptor: Arc = Arc::new(&config.tls_config); let connections = tokio_util::task::task_tracker::TaskTracker::new(); connections.close(); // allows `connections.wait to complete` @@ -216,22 +206,20 @@ pub(crate) type AsyncRW = Pin>; #[async_trait] trait MaybeTlsAcceptor: Send + Sync + 'static { - async fn accept(self: Arc, conn: ChainRW) -> std::io::Result; + async fn accept(&self, conn: ChainRW) -> std::io::Result; } #[async_trait] -impl MaybeTlsAcceptor for rustls::ServerConfig { - async fn accept(self: Arc, conn: ChainRW) -> std::io::Result { - Ok(Box::pin(TlsAcceptor::from(self).accept(conn).await?)) - } -} - -struct NoTls; - -#[async_trait] -impl MaybeTlsAcceptor for NoTls { - async fn accept(self: Arc, conn: ChainRW) -> std::io::Result { - Ok(Box::pin(conn)) +impl MaybeTlsAcceptor for &'static ArcSwapOption { + async fn accept(&self, conn: ChainRW) -> std::io::Result { + match &*self.load() { + Some(config) => Ok(Box::pin( + TlsAcceptor::from(config.http_config.clone()) + .accept(conn) + .await?, + )), + None => Ok(Box::pin(conn)), + } } } diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index 47009086c3..a79a478126 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -614,7 +614,9 @@ async fn handle_inner( &config.authentication_config, ctx, request.headers(), - config.tls_config.as_ref(), + // todo: race condition? + // we're unlikely to change the common names. + config.tls_config.load().as_deref(), )?; info!( user = conn_info.conn_info.user_info.user.as_str(), diff --git a/proxy/src/tls/server_config.rs b/proxy/src/tls/server_config.rs index 903c0b712b..4cbd0474c2 100644 --- a/proxy/src/tls/server_config.rs +++ b/proxy/src/tls/server_config.rs @@ -9,17 +9,14 @@ use rustls::pki_types::{CertificateDer, PrivateKeyDer}; use super::{PG_ALPN_PROTOCOL, TlsServerEndPoint}; pub struct TlsConfig { - pub config: Arc, + // unfortunate split since we cannot change the ALPN on demand. + // + pub http_config: Arc, + pub pg_config: Arc, pub common_names: HashSet, pub cert_resolver: Arc, } -impl TlsConfig { - pub fn to_server_config(&self) -> Arc { - self.config.clone() - } -} - /// Configure TLS for the main endpoint. pub fn configure_tls( key_path: &str, @@ -71,8 +68,15 @@ pub fn configure_tls( config.key_log = Arc::new(rustls::KeyLogFile::new()); } + let mut http_config = config.clone(); + let mut pg_config = config; + + http_config.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; + pg_config.alpn_protocols = vec![b"postgresql".to_vec()]; + Ok(TlsConfig { - config: Arc::new(config), + http_config: Arc::new(http_config), + pg_config: Arc::new(pg_config), common_names, cert_resolver, }) diff --git a/workspace_hack/Cargo.toml b/workspace_hack/Cargo.toml index f1696c5ff9..6a726f0585 100644 --- a/workspace_hack/Cargo.toml +++ b/workspace_hack/Cargo.toml @@ -26,11 +26,14 @@ camino = { version = "1", default-features = false, features = ["serde1"] } chrono = { version = "0.4", default-features = false, features = ["clock", "serde", "wasmbind"] } clap = { version = "4", features = ["derive", "env", "string"] } clap_builder = { version = "4", default-features = false, features = ["color", "env", "help", "std", "string", "suggestions", "usage"] } +const-oid = { version = "0.9", default-features = false, features = ["db", "std"] } crypto-bigint = { version = "0.5", features = ["generic-array", "zeroize"] } -der = { version = "0.7", default-features = false, features = ["oid", "pem", "std"] } +der = { version = "0.7", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } deranged = { version = "0.3", default-features = false, features = ["powerfmt", "serde", "std"] } digest = { version = "0.10", features = ["mac", "oid", "std"] } +ecdsa = { version = "0.16", features = ["pem", "signing", "std", "verifying"] } either = { version = "1" } +elliptic-curve = { version = "0.13", default-features = false, features = ["digest", "hazmat", "jwk", "pem", "std"] } env_filter = { version = "0.1", default-features = false, features = ["regex"] } env_logger = { version = "0.11" } fail = { version = "0.5", default-features = false, features = ["failpoints"] } @@ -65,6 +68,7 @@ num-iter = { version = "0.1", default-features = false, features = ["i128", "std num-rational = { version = "0.4", default-features = false, features = ["num-bigint-std", "std"] } num-traits = { version = "0.2", features = ["i128", "libm"] } once_cell = { version = "1" } +p256 = { version = "0.13", features = ["jwk"] } parquet = { version = "53", default-features = false, features = ["zstd"] } prost = { version = "0.13", features = ["no-recursion-limit", "prost-derive"] } rand = { version = "0.8", features = ["small_rng"] } @@ -74,6 +78,7 @@ regex-syntax = { version = "0.8" } reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls", "rustls-tls-native-roots", "stream"] } rustls = { version = "0.23", default-features = false, features = ["logging", "ring", "std", "tls12"] } scopeguard = { version = "1" } +sec1 = { version = "0.7", features = ["pem", "serde", "std", "subtle"] } serde = { version = "1", features = ["alloc", "derive"] } serde_json = { version = "1", features = ["alloc", "raw_value"] } sha2 = { version = "0.10", features = ["asm", "oid"] } From ed31dd2a3c9cdb6dce6ea26e50a42be477e2a3a2 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 13 Mar 2025 16:03:53 +0100 Subject: [PATCH 180/207] pageserver: better observability for slow wait_lsn (#11176) # Problem We leave too few observability breadcrumbs in the case where wait_lsn is exceptionally slow. # Changes - refactor: extract the monitoring logic out of `log_slow` into `monitor_slow_future` - add global + per-timeline counter for time spent waiting for wait_lsn - It is updated while we're still waiting, similar to what we do for page_service response flush. - add per-timeline counterpair for started & finished wait_lsn count - add slow-logging to leave breadcrumbs in logs, not just metrics For the slow-logging, we need to consider not flooding the logs during a broker or network outage/blip. The solution is a "log-streak-level" concurrency limit per timeline. At any given time, there is at most one slow wait_lsn that is logging the "still running" and "completed" sequence of logs. Other concurrent slow wait_lsn's don't log at all. This leaves at least one breadcrumb in each timeline's logs if some wait_lsn was exceptionally slow during a given period. The full degree of slowness can then be determined by looking at the per-timeline metric. # Performance Reran the `bench_log_slow` benchmark, no difference, so, existing call sites are fine. We do use a Semaphore, but only try_acquire it _after_ things have already been determined to be slow. So, no baseline overhead anticipated. # Refs - https://github.com/neondatabase/cloud/issues/23486#issuecomment-2711587222 --- libs/utils/benches/benchmarks.rs | 8 ++- libs/utils/src/logging.rs | 91 +++++++++++++++++++++++------- pageserver/src/metrics.rs | 92 ++++++++++++++++++++++++------- pageserver/src/page_service.rs | 19 +++++-- pageserver/src/tenant/timeline.rs | 63 +++++++++++++++++++-- test_runner/fixtures/metrics.py | 3 + 6 files changed, 227 insertions(+), 49 deletions(-) diff --git a/libs/utils/benches/benchmarks.rs b/libs/utils/benches/benchmarks.rs index 12c620ec87..35f3baaed1 100644 --- a/libs/utils/benches/benchmarks.rs +++ b/libs/utils/benches/benchmarks.rs @@ -49,7 +49,13 @@ pub fn bench_log_slow(c: &mut Criterion) { // performance too. Use a simple noop future that yields once, to avoid any scheduler fast // paths for a ready future. if enabled { - b.iter(|| runtime.block_on(log_slow("ready", THRESHOLD, tokio::task::yield_now()))); + b.iter(|| { + runtime.block_on(log_slow( + "ready", + THRESHOLD, + std::pin::pin!(tokio::task::yield_now()), + )) + }); } else { b.iter(|| runtime.block_on(tokio::task::yield_now())); } diff --git a/libs/utils/src/logging.rs b/libs/utils/src/logging.rs index f37f05692a..0ac8201795 100644 --- a/libs/utils/src/logging.rs +++ b/libs/utils/src/logging.rs @@ -331,37 +331,90 @@ impl std::fmt::Debug for SecretString { /// /// TODO: consider upgrading this to a warning, but currently it fires too often. #[inline] -pub async fn log_slow(name: &str, threshold: Duration, f: impl Future) -> O { - // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and - // won't fit on the stack. - let mut f = Box::pin(f); +pub async fn log_slow(name: &str, threshold: Duration, f: std::pin::Pin<&mut F>) -> O +where + F: Future, +{ + monitor_slow_future( + threshold, + threshold, // period = threshold + f, + |MonitorSlowFutureCallback { + ready, + is_slow, + elapsed_total, + elapsed_since_last_callback: _, + }| { + if !is_slow { + return; + } + if ready { + info!( + "slow {name} completed after {:.3}s", + elapsed_total.as_secs_f64() + ); + } else { + info!( + "slow {name} still running after {:.3}s", + elapsed_total.as_secs_f64() + ); + } + }, + ) + .await +} +/// Poll future `fut` to completion, invoking callback `cb` at the given `threshold` and every +/// `period` afterwards, and also unconditionally when the future completes. +#[inline] +pub async fn monitor_slow_future( + threshold: Duration, + period: Duration, + mut fut: std::pin::Pin<&mut F>, + mut cb: impl FnMut(MonitorSlowFutureCallback), +) -> O +where + F: Future, +{ let started = Instant::now(); let mut attempt = 1; - + let mut last_cb = started; loop { // NB: use timeout_at() instead of timeout() to avoid an extra clock reading in the common // case where the timeout doesn't fire. - let deadline = started + attempt * threshold; - if let Ok(output) = tokio::time::timeout_at(deadline, &mut f).await { - // NB: we check if we exceeded the threshold even if the timeout never fired, because - // scheduling or execution delays may cause the future to succeed even if it exceeds the - // timeout. This costs an extra unconditional clock reading, but seems worth it to avoid - // false negatives. - let elapsed = started.elapsed(); - if elapsed >= threshold { - info!("slow {name} completed after {:.3}s", elapsed.as_secs_f64()); - } + let deadline = started + threshold + (attempt - 1) * period; + // TODO: still call the callback if the future panics? Copy how we do it for the page_service flush_in_progress counter. + let res = tokio::time::timeout_at(deadline, &mut fut).await; + let now = Instant::now(); + let elapsed_total = now - started; + cb(MonitorSlowFutureCallback { + ready: res.is_ok(), + is_slow: elapsed_total >= threshold, + elapsed_total, + elapsed_since_last_callback: now - last_cb, + }); + last_cb = now; + if let Ok(output) = res { return output; } - - let elapsed = started.elapsed().as_secs_f64(); - info!("slow {name} still running after {elapsed:.3}s",); - attempt += 1; } } +/// See [`monitor_slow_future`]. +pub struct MonitorSlowFutureCallback { + /// Whether the future completed. If true, there will be no more callbacks. + pub ready: bool, + /// Whether the future is taking `>=` the specififed threshold duration to complete. + /// Monotonic: if true in one callback invocation, true in all subsequent onces. + pub is_slow: bool, + /// The time elapsed since the [`monitor_slow_future`] was first polled. + pub elapsed_total: Duration, + /// The time elapsed since the last callback invocation. + /// For the initial callback invocation, the time elapsed since the [`monitor_slow_future`] was first polled. + pub elapsed_since_last_callback: Duration, +} + #[cfg(test)] mod tests { use metrics::IntCounterVec; diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index fd90ef8cd7..f7afaae068 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -465,12 +465,40 @@ pub(crate) fn page_cache_errors_inc(error_kind: PageCacheErrorKind) { pub(crate) static WAIT_LSN_TIME: Lazy = Lazy::new(|| { register_histogram!( "pageserver_wait_lsn_seconds", - "Time spent waiting for WAL to arrive", + "Time spent waiting for WAL to arrive. Updated on completion of the wait_lsn operation.", CRITICAL_OP_BUCKETS.into(), ) .expect("failed to define a metric") }); +pub(crate) static WAIT_LSN_START_FINISH_COUNTERPAIR: Lazy = Lazy::new(|| { + register_int_counter_pair_vec!( + "pageserver_wait_lsn_started_count", + "Number of wait_lsn operations started.", + "pageserver_wait_lsn_finished_count", + "Number of wait_lsn operations finished.", + &["tenant_id", "shard_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + +pub(crate) static WAIT_LSN_IN_PROGRESS_MICROS: Lazy = Lazy::new(|| { + register_int_counter_vec!( + "pageserver_wait_lsn_in_progress_micros", + "Time spent waiting for WAL to arrive, by timeline_id. Updated periodically while waiting.", + &["tenant_id", "shard_id", "timeline_id"], + ) + .expect("failed to define a metric") +}); + +pub(crate) static WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_wait_lsn_in_progress_micros_global", + "Time spent waiting for WAL to arrive, globally. Updated periodically while waiting." + ) + .expect("failed to define a metric") +}); + static FLUSH_WAIT_UPLOAD_TIME: Lazy = Lazy::new(|| { register_gauge_vec!( "pageserver_flush_wait_upload_seconds", @@ -2830,7 +2858,6 @@ impl StorageTimeMetrics { } } -#[derive(Debug)] pub(crate) struct TimelineMetrics { tenant_id: String, shard_id: String, @@ -2863,6 +2890,8 @@ pub(crate) struct TimelineMetrics { pub valid_lsn_lease_count_gauge: UIntGauge, pub wal_records_received: IntCounter, pub storage_io_size: StorageIoSizeMetrics, + pub wait_lsn_in_progress_micros: GlobalAndPerTenantIntCounter, + pub wait_lsn_start_finish_counterpair: IntCounterPair, shutdown: std::sync::atomic::AtomicBool, } @@ -3000,6 +3029,17 @@ impl TimelineMetrics { let storage_io_size = StorageIoSizeMetrics::new(&tenant_id, &shard_id, &timeline_id); + let wait_lsn_in_progress_micros = GlobalAndPerTenantIntCounter { + global: WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS.clone(), + per_tenant: WAIT_LSN_IN_PROGRESS_MICROS + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(), + }; + + let wait_lsn_start_finish_counterpair = WAIT_LSN_START_FINISH_COUNTERPAIR + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); + TimelineMetrics { tenant_id, shard_id, @@ -3032,6 +3072,8 @@ impl TimelineMetrics { storage_io_size, valid_lsn_lease_count_gauge, wal_records_received, + wait_lsn_in_progress_micros, + wait_lsn_start_finish_counterpair, shutdown: std::sync::atomic::AtomicBool::default(), } } @@ -3224,6 +3266,15 @@ impl TimelineMetrics { let _ = STORAGE_IO_SIZE.remove_label_values(&[op, tenant_id, shard_id, timeline_id]); } + let _ = + WAIT_LSN_IN_PROGRESS_MICROS.remove_label_values(&[tenant_id, shard_id, timeline_id]); + + { + let mut res = [Ok(()), Ok(())]; + WAIT_LSN_START_FINISH_COUNTERPAIR + .remove_label_values(&mut res, &[tenant_id, shard_id, timeline_id]); + } + let _ = SMGR_QUERY_STARTED_PER_TENANT_TIMELINE.remove_label_values(&[ SmgrQueryType::GetPageAtLsn.into(), tenant_id, @@ -3836,27 +3887,29 @@ pub mod tokio_epoll_uring { }); } +pub(crate) struct GlobalAndPerTenantIntCounter { + global: IntCounter, + per_tenant: IntCounter, +} + +impl GlobalAndPerTenantIntCounter { + #[inline(always)] + pub(crate) fn inc(&self) { + self.inc_by(1) + } + #[inline(always)] + pub(crate) fn inc_by(&self, n: u64) { + self.global.inc_by(n); + self.per_tenant.inc_by(n); + } +} + pub(crate) mod tenant_throttling { - use metrics::{IntCounter, register_int_counter_vec}; + use metrics::register_int_counter_vec; use once_cell::sync::Lazy; use utils::shard::TenantShardId; - pub(crate) struct GlobalAndPerTenantIntCounter { - global: IntCounter, - per_tenant: IntCounter, - } - - impl GlobalAndPerTenantIntCounter { - #[inline(always)] - pub(crate) fn inc(&self) { - self.inc_by(1) - } - #[inline(always)] - pub(crate) fn inc_by(&self, n: u64) { - self.global.inc_by(n); - self.per_tenant.inc_by(n); - } - } + use super::GlobalAndPerTenantIntCounter; pub(crate) struct Metrics { pub(super) count_accounted_start: GlobalAndPerTenantIntCounter, @@ -4102,6 +4155,7 @@ pub fn preinitialize_metrics(conf: &'static PageServerConf) { &CIRCUIT_BREAKERS_BROKEN, &CIRCUIT_BREAKERS_UNBROKEN, &PAGE_SERVICE_SMGR_FLUSH_INPROGRESS_MICROS_GLOBAL, + &WAIT_LSN_IN_PROGRESS_GLOBAL_MICROS, ] .into_iter() .for_each(|c| { diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index f2d2ab05ad..94571cbaaa 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -1106,12 +1106,19 @@ impl PageServerHandler { }; // Dispatch the batch to the appropriate request handler. - let (mut handler_results, span) = log_slow( - batch.as_static_str(), - LOG_SLOW_GETPAGE_THRESHOLD, - self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx), - ) - .await?; + let log_slow_name = batch.as_static_str(); + let (mut handler_results, span) = { + // TODO: we unfortunately have to pin the future on the heap, since GetPage futures are huge and + // won't fit on the stack. + let mut boxpinned = + Box::pin(self.pagestream_dispatch_batched_message(batch, io_concurrency, ctx)); + log_slow( + log_slow_name, + LOG_SLOW_GETPAGE_THRESHOLD, + boxpinned.as_mut(), + ) + .await? + }; // We purposefully don't count flush time into the smgr operation timer. // diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 61542409f7..6cca8cc407 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -67,6 +67,7 @@ use tracing::*; use utils::generation::Generation; use utils::guard_arc_swap::GuardArcSwap; use utils::id::TimelineId; +use utils::logging::{MonitorSlowFutureCallback, monitor_slow_future}; use utils::lsn::{AtomicLsn, Lsn, RecordLsn}; use utils::postgres_client::PostgresClientProtocol; use utils::rate_limit::RateLimit; @@ -439,6 +440,8 @@ pub struct Timeline { heatmap_layers_downloader: Mutex>, pub(crate) rel_size_v2_status: ArcSwapOption, + + wait_lsn_log_slow: tokio::sync::Semaphore, } pub(crate) enum PreviousHeatmap { @@ -1479,17 +1482,67 @@ impl Timeline { WaitLsnTimeout::Default => self.conf.wait_lsn_timeout, }; - let _timer = crate::metrics::WAIT_LSN_TIME.start_timer(); + let timer = crate::metrics::WAIT_LSN_TIME.start_timer(); + let start_finish_counterpair_guard = self.metrics.wait_lsn_start_finish_counterpair.guard(); - match self.last_record_lsn.wait_for_timeout(lsn, timeout).await { + let wait_for_timeout = self.last_record_lsn.wait_for_timeout(lsn, timeout); + let wait_for_timeout = std::pin::pin!(wait_for_timeout); + // Use threshold of 1 because even 1 second of wait for ingest is very much abnormal. + let log_slow_threshold = Duration::from_secs(1); + // Use period of 10 to avoid flooding logs during an outage that affects all timelines. + let log_slow_period = Duration::from_secs(10); + let mut logging_permit = None; + let wait_for_timeout = monitor_slow_future( + log_slow_threshold, + log_slow_period, + wait_for_timeout, + |MonitorSlowFutureCallback { + ready, + is_slow, + elapsed_total, + elapsed_since_last_callback, + }| { + self.metrics + .wait_lsn_in_progress_micros + .inc_by(u64::try_from(elapsed_since_last_callback.as_micros()).unwrap()); + if !is_slow { + return; + } + // It's slow, see if we should log it. + // (We limit the logging to one per invocation per timeline to avoid excessive + // logging during an extended broker / networking outage that affects all timelines.) + if logging_permit.is_none() { + logging_permit = self.wait_lsn_log_slow.try_acquire().ok(); + } + if logging_permit.is_none() { + return; + } + // We log it. + if ready { + info!( + "slow wait_lsn completed after {:.3}s", + elapsed_total.as_secs_f64() + ); + } else { + info!( + "slow wait_lsn still running for {:.3}s", + elapsed_total.as_secs_f64() + ); + } + }, + ); + let res = wait_for_timeout.await; + // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo + drop(logging_permit); + drop(start_finish_counterpair_guard); + drop(timer); + match res { Ok(()) => Ok(()), Err(e) => { use utils::seqwait::SeqWaitError::*; match e { Shutdown => Err(WaitLsnError::Shutdown), Timeout => { - // don't count the time spent waiting for lock below, and also in walreceiver.status(), towards the wait_lsn_time_histo - drop(_timer); let walreceiver_status = self.walreceiver_status(); Err(WaitLsnError::Timeout(format!( "Timed out while waiting for WAL record at LSN {} to arrive, last_record_lsn {} disk consistent LSN={}, WalReceiver status: {}", @@ -2821,6 +2874,8 @@ impl Timeline { heatmap_layers_downloader: Mutex::new(None), rel_size_v2_status: ArcSwapOption::from_pointee(rel_size_v2_status), + + wait_lsn_log_slow: tokio::sync::Semaphore::new(1), }; result.repartition_threshold = diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 83a1a87611..54e6458ac6 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -175,6 +175,9 @@ PAGESERVER_PER_TENANT_METRICS: tuple[str, ...] = ( counter("pageserver_tenant_throttling_count"), counter("pageserver_timeline_wal_records_received"), counter("pageserver_page_service_pagestream_flush_in_progress_micros"), + counter("pageserver_wait_lsn_in_progress_micros"), + counter("pageserver_wait_lsn_started_count"), + counter("pageserver_wait_lsn_finished_count"), *histogram("pageserver_page_service_batch_size"), *histogram("pageserver_page_service_pagestream_batch_wait_time_seconds"), *PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS, From 3c3b9dc9197dc43cce8bd24dfe4feab179ac118c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 17:28:21 +0100 Subject: [PATCH 181/207] pageserver: enable `image_creation_preempt_threshold` by default (#11216) ## Problem This is already set in production, we should harmonize the default. ## Summary of changes Default `image_creation_preempt_threshold` to 3. --- libs/pageserver_api/src/config.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index 6e457823dd..c7d33d8a04 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -587,9 +587,8 @@ pub mod tenant_conf_defaults { pub const DEFAULT_GC_PERIOD: &str = "1 hr"; pub const DEFAULT_IMAGE_CREATION_THRESHOLD: usize = 3; // If there are more than threshold * compaction_threshold (that is 3 * 10 in the default config) L0 layers, image - // layer creation will end immediately. Set to 0 to disable. The target default will be 3 once we - // want to enable this feature. - pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 0; + // layer creation will end immediately. Set to 0 to disable. + pub const DEFAULT_IMAGE_CREATION_PREEMPT_THRESHOLD: usize = 3; pub const DEFAULT_PITR_INTERVAL: &str = "7 days"; pub const DEFAULT_WALRECEIVER_CONNECT_TIMEOUT: &str = "10 seconds"; pub const DEFAULT_WALRECEIVER_LAGGING_WAL_TIMEOUT: &str = "10 seconds"; From 398d2794ebb63bb80ff3c896a1cd84878ef4c821 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Thu, 13 Mar 2025 18:30:32 +0200 Subject: [PATCH 182/207] Handle DEBUG_COMPARE_LOCAL mode in neon_zeroextend (#11220) ## Problem DEBUG_COMPARE_LOCAL is not supported in neon_zeroextend added in PG16 ## Summary of changes Add support of DEBUG_COMPARE_LOCAL in neon_zeroextend Co-authored-by: Konstantin Knizhnik --- pgxn/neon/pagestore_smgr.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 1135212e22..6fe95df3dd 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -2898,6 +2898,11 @@ neon_zeroextend(SMgrRelation reln, ForkNumber forkNum, BlockNumber blocknum, relpath(reln->smgr_rlocator, forkNum), InvalidBlockNumber))); +#ifdef DEBUG_COMPARE_LOCAL + if (IS_LOCAL_REL(reln)) + mdzeroextend(reln, forkNum, blocknum, nblocks, skipFsync); +#endif + /* Don't log any pages if we're not allowed to do so. */ if (!XLogInsertAllowed()) return; From 066b0a1be91bbf21984d20de9931ca0f650f441f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Thu, 13 Mar 2025 19:18:55 +0100 Subject: [PATCH 183/207] fix(ci): correctly push neon-test-extensions in releases and to ghcr (#11225) ## Problem ef0d4a48a adjusted how we build container images and how we push them, and the neon-test-extensions image was overlooked. Additionally, is was also missed in 1f0dea9a1, which pushed our container images to GHCR. ## Summary of changes Push neon-test-extensions to GHCR and also push release tags for it. --- .github/workflows/_meta.yml | 2 +- .github/workflows/build_and_test.yml | 49 +++++++++++++++++++++++++--- 2 files changed, 45 insertions(+), 6 deletions(-) diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index f029385980..9e49c1ebc8 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -125,5 +125,5 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} run: | - RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Faied to find Build and Test run from RC PR!" | halt_error(1))') + RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Falied to find Build and Test run from RC PR!" | halt_error(1))') echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e1ad972a61..409ad6be3d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -978,16 +978,55 @@ jobs: acr-registry-name: ${{ vars.AZURE_PROD_REGISTRY_NAME }} secrets: inherit - # This is a bit of a special case so we're not using a generated image map. - add-latest-tag-to-neon-extensions-test-image: - if: github.ref_name == 'main' + push-neon-test-extensions-image-ghcr: + if: ${{ contains(fromJSON('["push-main", "pr", "compute-rc-pr"]'), needs.meta.outputs.run-kind) }} needs: [ meta, compute-node-image ] uses: ./.github/workflows/_push-to-container-registry.yml with: image-map: | { - "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v16:latest"], - "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": ["docker.io/neondatabase/neon-test-extensions-v17:latest"] + "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [ + "ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}" + ], + "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [ + "ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}" + ] + } + secrets: inherit + + add-latest-tag-to-neon-test-extensions-image: + if: ${{ needs.meta.outputs.run-kind == 'push-main' }} + needs: [ meta, compute-node-image ] + uses: ./.github/workflows/_push-to-container-registry.yml + with: + image-map: | + { + "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}": [ + "docker.io/neondatabase/neon-test-extensions-v16:latest", + "ghcr.io/neondatabase/neon-test-extensions-v16:latest" + ], + "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}": [ + "docker.io/neondatabase/neon-test-extensions-v17:latest", + "ghcr.io/neondatabase/neon-test-extensions-v17:latest" + ] + } + secrets: inherit + + add-release-tag-to-neon-test-extensions-image: + if: ${{ needs.meta.outputs.run-kind == 'compute-release' }} + needs: [ meta, compute-node-image ] + uses: ./.github/workflows/_push-to-container-registry.yml + with: + image-map: | + { + "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.release-pr-run-id }}": [ + "docker.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}", + "ghcr.io/neondatabase/neon-test-extensions-v16:${{ needs.meta.outputs.build-tag }}" + ], + "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.release-pr-run-id }}": [ + "docker.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}", + "ghcr.io/neondatabase/neon-test-extensions-v17:${{ needs.meta.outputs.build-tag }}" + ] } secrets: inherit From 8afae9d03c13141d2b0c6ebcc2985949e838f5f7 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 20:15:22 +0100 Subject: [PATCH 184/207] pageserver: enable `l0_flush_delay_threshold` by default (#11214) ## Problem `l0_flush_delay_threshold` has already been set to 30 in production for a couple of weeks. Let's harmonize the default. ## Summary of changes Update `DEFAULT_L0_FLUSH_DELAY_FACTOR` to 3 such that the default `l0_flush_delay_threshold` is `3 * compaction_threshold`. This differs from the production setting, which is hardcoded to 30 (with `compaction_threshold` at 10), and is more appropriate for any tenants that have custom `compaction_threshold` overrides. --- libs/pageserver_api/src/config.rs | 8 ++++---- pageserver/src/tenant/timeline.rs | 10 ++++++---- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index c7d33d8a04..e112a57c9d 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -278,10 +278,10 @@ pub struct TenantConfigToml { /// If true, use a separate semaphore (i.e. concurrency limit) for the L0 compaction pass. Only /// has an effect if `compaction_l0_first` is true. Defaults to true. pub compaction_l0_semaphore: bool, - /// Level0 delta layer threshold at which to delay layer flushes for compaction backpressure, - /// such that they take 2x as long, and start waiting for layer flushes during ephemeral layer - /// rolls. This helps compaction keep up with WAL ingestion, and avoids read amplification - /// blowing up. Should be >compaction_threshold. 0 to disable. Disabled by default. + /// Level0 delta layer threshold at which to delay layer flushes such that they take 2x as long, + /// and block on layer flushes during ephemeral layer rolls, for compaction backpressure. This + /// helps compaction keep up with WAL ingestion, and avoids read amplification blowing up. + /// Should be >compaction_threshold. 0 to disable. Defaults to 3x compaction_threshold. pub l0_flush_delay_threshold: Option, /// Level0 delta layer threshold at which to stall layer flushes. Must be >compaction_threshold /// to avoid deadlock. 0 to disable. Disabled by default. diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 6cca8cc407..be861a0c89 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2476,8 +2476,9 @@ impl Timeline { } fn get_l0_flush_delay_threshold(&self) -> Option { - // Disable L0 flushes by default. This and compaction needs further tuning. - const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 0; // TODO: default to e.g. 3 + // By default, delay L0 flushes at 3x the compaction threshold. The compaction threshold + // defaults to 10, and L0 compaction is generally able to keep L0 counts below 30. + const DEFAULT_L0_FLUSH_DELAY_FACTOR: usize = 3; // If compaction is disabled, don't delay. if self.get_compaction_period() == Duration::ZERO { @@ -2505,8 +2506,9 @@ impl Timeline { } fn get_l0_flush_stall_threshold(&self) -> Option { - // Disable L0 stalls by default. In ingest benchmarks, we see image compaction take >10 - // minutes, blocking L0 compaction, and we can't stall L0 flushes for that long. + // Disable L0 stalls by default. Stalling can cause unavailability if L0 compaction isn't + // responsive, and it can e.g. block on other compaction via the compaction semaphore or + // sibling timelines. We need more confidence before enabling this. const DEFAULT_L0_FLUSH_STALL_FACTOR: usize = 0; // TODO: default to e.g. 5 // If compaction is disabled, don't stall. From b1a1be6a4cb91d6a5af9f643a16f786eeb128c17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 13 Mar 2025 20:50:52 +0100 Subject: [PATCH 185/207] switch pytests and neon_local to control_plane_hooks_api (#11195) We want to switch away from and deprecate the `--compute-hook-url` param for the storcon in favour of `--control-plane-url` because it allows us to construct urls with `notify-safekeepers`. This PR switches the pytests and neon_local from a `control_plane_compute_hook_api` to a new param named `control_plane_hooks_api` which is supposed to point to the parent of the `notify-attach` URL. We still support reading the old url from disk to not be too disruptive with existing deployments, but we just ignore it. Also add docs for the `notify-safekeepers` upcall API. Follow-up of #11173 Part of https://github.com/neondatabase/neon/issues/11163 --- control_plane/src/bin/neon_local.rs | 2 +- control_plane/src/local_env.rs | 19 ++++--- control_plane/src/storage_controller.rs | 6 +- docs/storage_controller.md | 57 ++++++++++++++++--- test_runner/fixtures/compute_reconfigure.py | 2 +- test_runner/fixtures/neon_fixtures.py | 8 +-- .../test_storage_controller_scale.py | 4 +- test_runner/regress/test_change_pageserver.py | 4 +- .../regress/test_pageserver_secondary.py | 4 +- test_runner/regress/test_sharding.py | 8 +-- .../regress/test_storage_controller.py | 20 +++---- 11 files changed, 85 insertions(+), 49 deletions(-) diff --git a/control_plane/src/bin/neon_local.rs b/control_plane/src/bin/neon_local.rs index 72ebbafd3b..747268f80b 100644 --- a/control_plane/src/bin/neon_local.rs +++ b/control_plane/src/bin/neon_local.rs @@ -979,7 +979,7 @@ fn handle_init(args: &InitCmdArgs) -> anyhow::Result { neon_distrib_dir: None, default_tenant_id: TenantId::from_array(std::array::from_fn(|_| 0)), storage_controller: None, - control_plane_compute_hook_api: None, + control_plane_hooks_api: None, generate_local_ssl_certs: false, } }; diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index ec9eb74e6f..2e57236ddb 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -72,9 +72,9 @@ pub struct LocalEnv { // be propagated into each pageserver's configuration. pub control_plane_api: Url, - // Control plane upcall API for storage controller. If set, this will be propagated into the + // Control plane upcall APIs for storage controller. If set, this will be propagated into the // storage controller's configuration. - pub control_plane_compute_hook_api: Option, + pub control_plane_hooks_api: Option, /// Keep human-readable aliases in memory (and persist them to config), to hide ZId hex strings from the user. // A `HashMap>` would be more appropriate here, @@ -104,6 +104,7 @@ pub struct OnDiskConfig { pub pageservers: Vec, pub safekeepers: Vec, pub control_plane_api: Option, + pub control_plane_hooks_api: Option, pub control_plane_compute_hook_api: Option, branch_name_mappings: HashMap>, // Note: skip serializing because in compat tests old storage controller fails @@ -136,7 +137,7 @@ pub struct NeonLocalInitConf { pub pageservers: Vec, pub safekeepers: Vec, pub control_plane_api: Option, - pub control_plane_compute_hook_api: Option>, + pub control_plane_hooks_api: Option, pub generate_local_ssl_certs: bool, } @@ -573,7 +574,8 @@ impl LocalEnv { pageservers, safekeepers, control_plane_api, - control_plane_compute_hook_api, + control_plane_hooks_api, + control_plane_compute_hook_api: _, branch_name_mappings, generate_local_ssl_certs, } = on_disk_config; @@ -588,7 +590,7 @@ impl LocalEnv { pageservers, safekeepers, control_plane_api: control_plane_api.unwrap(), - control_plane_compute_hook_api, + control_plane_hooks_api, branch_name_mappings, generate_local_ssl_certs, } @@ -695,7 +697,8 @@ impl LocalEnv { pageservers: vec![], // it's skip_serializing anyway safekeepers: self.safekeepers.clone(), control_plane_api: Some(self.control_plane_api.clone()), - control_plane_compute_hook_api: self.control_plane_compute_hook_api.clone(), + control_plane_hooks_api: self.control_plane_hooks_api.clone(), + control_plane_compute_hook_api: None, branch_name_mappings: self.branch_name_mappings.clone(), generate_local_ssl_certs: self.generate_local_ssl_certs, }, @@ -779,8 +782,8 @@ impl LocalEnv { pageservers, safekeepers, control_plane_api, - control_plane_compute_hook_api, generate_local_ssl_certs, + control_plane_hooks_api, } = conf; // Find postgres binaries. @@ -827,7 +830,7 @@ impl LocalEnv { pageservers: pageservers.iter().map(Into::into).collect(), safekeepers, control_plane_api: control_plane_api.unwrap(), - control_plane_compute_hook_api: control_plane_compute_hook_api.unwrap_or_default(), + control_plane_hooks_api, branch_name_mappings: Default::default(), generate_local_ssl_certs, }; diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index bbd7f67720..e28fd70fdf 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -558,10 +558,8 @@ impl StorageController { args.push(format!("--public-key=\"{public_key}\"")); } - if let Some(control_plane_compute_hook_api) = &self.env.control_plane_compute_hook_api { - args.push(format!( - "--compute-hook-url={control_plane_compute_hook_api}" - )); + if let Some(control_plane_hooks_api) = &self.env.control_plane_hooks_api { + args.push(format!("--control-plane-url={control_plane_hooks_api}")); } if let Some(split_threshold) = self.config.split_threshold.as_ref() { diff --git a/docs/storage_controller.md b/docs/storage_controller.md index cf00cd8e33..ac4aca4219 100644 --- a/docs/storage_controller.md +++ b/docs/storage_controller.md @@ -101,15 +101,25 @@ changes such as a pageserver node becoming unavailable, or the tenant's shard co postgres clients to handle such changes, the storage controller calls an API hook when a tenant's pageserver location changes. -The hook is configured using the storage controller's `--control-plane-url` CLI option. If the hook requires -JWT auth, the token may be provided with `--control-plane-jwt-token`. The hook will be invoked with a `PUT` request. +The hook is configured using the storage controller's `--control-plane-url` CLI option, from which the hook URL is computed. -In the Neon cloud service, this hook is implemented by Neon's internal cloud control plane. In `neon_local` systems +Currently, there is two hooks, each computed by appending the name to the provided control plane URL prefix: + +- `notify-attach`, called whenever attachment for pageservers changes +- `notify-safekeepers`, called whenever attachment for safekeepers changes + +If the hooks require JWT auth, the token may be provided with `--control-plane-jwt-token`. +The hooks will be invoked with a `PUT` request. + +In the Neon cloud service, these hooks are implemented by Neon's internal cloud control plane. In `neon_local` systems, the storage controller integrates directly with neon_local to reconfigure local postgres processes instead of calling the compute hook. -When implementing an on-premise Neon deployment, you must implement a service that handles the compute hook. This is not complicated: -the request body has format of the `ComputeHookNotifyRequest` structure, provided below for convenience. +When implementing an on-premise Neon deployment, you must implement a service that handles the compute hooks. This is not complicated. + +### `notify-attach` body + +The `notify-attach` request body follows the format of the `ComputeHookNotifyRequest` structure, provided below for convenience. ``` struct ComputeHookNotifyRequestShard { @@ -128,15 +138,15 @@ When a notification is received: 1. Modify postgres configuration for this tenant: - - set `neon.pageserver_connstr` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The + - set `neon.pageserver_connstring` to a comma-separated list of postgres connection strings to pageservers according to the `shards` list. The shards identified by `NodeId` must be converted to the address+port of the node. - - if stripe_size is not None, set `neon.stripe_size` to this value + - if stripe_size is not None, set `neon.shard_stripe_size` to this value 2. Send SIGHUP to postgres to reload configuration 3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller will retry the notification until it succeeds.. -### Example notification body +Example body: ``` { @@ -148,3 +158,34 @@ When a notification is received: ], } ``` + +### `notify-safekeepers` body + +The `notify-safekeepers` request body forllows the format of the `SafekeepersNotifyRequest` structure, provided below for convenience. + +``` +pub struct SafekeeperInfo { + pub id: NodeId, + pub hostname: String, +} + +pub struct SafekeepersNotifyRequest { + pub tenant_id: TenantId, + pub timeline_id: TimelineId, + pub generation: u32, + pub safekeepers: Vec, +} +``` + +When a notification is received: + +1. Modify postgres configuration for this tenant: + + - set `neon.safekeeper_connstrings` to an array of postgres connection strings to safekeepers according to the `safekeepers` list. The + safekeepers identified by `NodeId` must be converted to the address+port of the respective safekeeper. + The hostname is provided for debugging purposes, so we reserve changes to how we pass it. + - set `neon.safekeepers_generation` to the provided `generation` value. + +2. Send SIGHUP to postgres to reload configuration +3. Respond with 200 to the notification request. Do not return success if postgres was not updated: if an error is returned, the controller + will retry the notification until it succeeds.. \ No newline at end of file diff --git a/test_runner/fixtures/compute_reconfigure.py b/test_runner/fixtures/compute_reconfigure.py index 425abef935..205b9141e0 100644 --- a/test_runner/fixtures/compute_reconfigure.py +++ b/test_runner/fixtures/compute_reconfigure.py @@ -19,7 +19,7 @@ if TYPE_CHECKING: class ComputeReconfigure: def __init__(self, server: HTTPServer): self.server = server - self.control_plane_compute_hook_api = f"http://{server.host}:{server.port}/notify-attach" + self.control_plane_hooks_api = f"http://{server.host}:{server.port}/" self.workloads: dict[TenantId, Any] = {} self.on_notify: Callable[[Any], None] | None = None diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 7bc746d668..11ca1d7913 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -460,7 +460,7 @@ class NeonEnvBuilder: self.overlay_mounts_created_by_us: list[tuple[str, Path]] = [] self.config_init_force: str | None = None self.top_output_dir = top_output_dir - self.control_plane_compute_hook_api: str | None = None + self.control_plane_hooks_api: str | None = None self.storage_controller_config: dict[Any, Any] | None = None # Flag to enable https listener in pageserver, generate local ssl certs, @@ -1116,7 +1116,7 @@ class NeonEnv: self.control_plane_api: str = self.storage_controller.upcall_api_endpoint() # For testing this with a fake HTTP server, enable passing through a URL from config - self.control_plane_compute_hook_api = config.control_plane_compute_hook_api + self.control_plane_hooks_api = config.control_plane_hooks_api self.pageserver_virtual_file_io_engine = config.pageserver_virtual_file_io_engine self.pageserver_virtual_file_io_mode = config.pageserver_virtual_file_io_mode @@ -1137,8 +1137,8 @@ class NeonEnv: if self.control_plane_api is not None: cfg["control_plane_api"] = self.control_plane_api - if self.control_plane_compute_hook_api is not None: - cfg["control_plane_compute_hook_api"] = self.control_plane_compute_hook_api + if self.control_plane_hooks_api is not None: + cfg["control_plane_hooks_api"] = self.control_plane_hooks_api storage_controller_config = self.storage_controller_config diff --git a/test_runner/performance/test_storage_controller_scale.py b/test_runner/performance/test_storage_controller_scale.py index 777b9e2870..e897d53cc8 100644 --- a/test_runner/performance/test_storage_controller_scale.py +++ b/test_runner/performance/test_storage_controller_scale.py @@ -83,9 +83,7 @@ def test_storage_controller_many_tenants( "max_offline": "30s", "max_warming_up": "300s", } - neon_env_builder.control_plane_compute_hook_api = ( - compute_reconfigure_listener.control_plane_compute_hook_api - ) + neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api AZS = ["alpha", "bravo", "charlie"] diff --git a/test_runner/regress/test_change_pageserver.py b/test_runner/regress/test_change_pageserver.py index 41aa5b47ca..5526b783d5 100644 --- a/test_runner/regress/test_change_pageserver.py +++ b/test_runner/regress/test_change_pageserver.py @@ -23,8 +23,8 @@ def test_change_pageserver(neon_env_builder: NeonEnvBuilder, make_httpserver): ) env = neon_env_builder.init_start() - neon_env_builder.control_plane_compute_hook_api = ( - f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach" + neon_env_builder.control_plane_hooks_api = ( + f"http://{make_httpserver.host}:{make_httpserver.port}/" ) def ignore_notify(request: Request): diff --git a/test_runner/regress/test_pageserver_secondary.py b/test_runner/regress/test_pageserver_secondary.py index 130db009c9..9f2aa5df8c 100644 --- a/test_runner/regress/test_pageserver_secondary.py +++ b/test_runner/regress/test_pageserver_secondary.py @@ -87,8 +87,8 @@ def test_location_conf_churn(neon_env_builder: NeonEnvBuilder, make_httpserver, neon_env_builder.enable_pageserver_remote_storage( remote_storage_kind=s3_storage(), ) - neon_env_builder.control_plane_compute_hook_api = ( - f"http://{make_httpserver.host}:{make_httpserver.port}/notify-attach" + neon_env_builder.control_plane_hooks_api = ( + f"http://{make_httpserver.host}:{make_httpserver.port}/" ) def ignore_notify(request: Request): diff --git a/test_runner/regress/test_sharding.py b/test_runner/regress/test_sharding.py index cb28f5b12d..b98ac8e50a 100644 --- a/test_runner/regress/test_sharding.py +++ b/test_runner/regress/test_sharding.py @@ -794,7 +794,7 @@ def test_sharding_split_stripe_size( Check that modifying stripe size inline with a shard split works as expected """ (host, port) = httpserver_listen_address - neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify" + neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}" neon_env_builder.num_pageservers = 1 # Set up fake HTTP notify endpoint: we will use this to validate that we receive @@ -806,7 +806,7 @@ def test_sharding_split_stripe_size( notifications.append(request.json) return Response(status=200) - httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) + httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler) env = neon_env_builder.init_start( initial_tenant_shard_count=1, initial_tenant_shard_stripe_size=initial_stripe_size @@ -1312,9 +1312,7 @@ def test_sharding_split_failures( failure: Failure, ): neon_env_builder.num_pageservers = 4 - neon_env_builder.control_plane_compute_hook_api = ( - compute_reconfigure_listener.control_plane_compute_hook_api - ) + neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api initial_shard_count = 2 split_shard_count = 4 diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index 5eaf69cfa1..05eb4301b0 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -605,7 +605,7 @@ def test_storage_controller_compute_hook( # when migrating. neon_env_builder.num_pageservers = 2 (host, port) = httpserver_listen_address - neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify" + neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}" # Set up fake HTTP notify endpoint notifications = [] @@ -618,7 +618,7 @@ def test_storage_controller_compute_hook( notifications.append(request.json) return Response(status=status) - httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) + httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler) # Start running env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) @@ -724,7 +724,7 @@ def test_storage_controller_stuck_compute_hook( neon_env_builder.num_pageservers = 2 (host, port) = httpserver_listen_address - neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify" + neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}" handle_params = {"status": 200} @@ -736,7 +736,7 @@ def test_storage_controller_stuck_compute_hook( notifications.append(request.json) return Response(status=status) - httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) + httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler) # Start running env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) @@ -871,7 +871,7 @@ def test_storage_controller_compute_hook_retry( neon_env_builder.num_pageservers = 2 (host, port) = httpserver_listen_address - neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify" + neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}" handle_params = {"status": 200} @@ -883,7 +883,7 @@ def test_storage_controller_compute_hook_retry( notifications.append(request.json) return Response(status=status) - httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) + httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler) # Start running env = neon_env_builder.init_configs() @@ -993,7 +993,7 @@ def test_storage_controller_compute_hook_revert( # when migrating. neon_env_builder.num_pageservers = 2 (host, port) = httpserver_listen_address - neon_env_builder.control_plane_compute_hook_api = f"http://{host}:{port}/notify" + neon_env_builder.control_plane_hooks_api = f"http://{host}:{port}" # Set up fake HTTP notify endpoint notifications = [] @@ -1006,7 +1006,7 @@ def test_storage_controller_compute_hook_revert( notifications.append(request.json) return Response(status=status) - httpserver.expect_request("/notify", method="PUT").respond_with_handler(handler) + httpserver.expect_request("/notify-attach", method="PUT").respond_with_handler(handler) # Start running env = neon_env_builder.init_start(initial_tenant_conf={"lsn_lease_length": "0s"}) @@ -1395,9 +1395,7 @@ def test_storage_controller_tenant_deletion( """ neon_env_builder.num_pageservers = 4 neon_env_builder.enable_pageserver_remote_storage(s3_storage()) - neon_env_builder.control_plane_compute_hook_api = ( - compute_reconfigure_listener.control_plane_compute_hook_api - ) + neon_env_builder.control_plane_hooks_api = compute_reconfigure_listener.control_plane_hooks_api env = neon_env_builder.init_configs() env.start() From 23b713900ef97251967691757011e4a793814188 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Thu, 13 Mar 2025 16:21:23 -0400 Subject: [PATCH 186/207] feat(storcon): passthrough ancestor detach behavior (#11199) ## Problem https://github.com/neondatabase/neon/issues/10310 https://github.com/neondatabase/neon/pull/11158 ## Summary of changes We need to passthrough the new detach behavior through the storcon API. Signed-off-by: Alex Chi Z --- libs/pageserver_api/src/models.rs | 33 +++++++++++++++++++ pageserver/client/src/mgmt_api.rs | 12 +++++-- pageserver/src/http/routes.rs | 8 ++--- pageserver/src/tenant/mgr.rs | 4 +-- pageserver/src/tenant/timeline.rs | 9 ++--- .../src/tenant/timeline/detach_ancestor.rs | 25 +------------- storage_controller/src/http.rs | 5 +-- storage_controller/src/pageserver_client.rs | 11 ++++--- storage_controller/src/service.rs | 11 ++++--- test_runner/fixtures/pageserver/http.py | 6 ++-- .../regress/test_timeline_detach_ancestor.py | 12 +++++-- 11 files changed, 84 insertions(+), 52 deletions(-) diff --git a/libs/pageserver_api/src/models.rs b/libs/pageserver_api/src/models.rs index 5e5bcf5338..4a8f75413c 100644 --- a/libs/pageserver_api/src/models.rs +++ b/libs/pageserver_api/src/models.rs @@ -176,6 +176,39 @@ impl LsnLease { } } +/// Controls the detach ancestor behavior. +/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point. +/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all. +#[derive(Debug, Clone, Copy, Default)] +pub enum DetachBehavior { + #[default] + NoAncestorAndReparent, + MultiLevelAndNoReparent, +} + +impl std::str::FromStr for DetachBehavior { + type Err = &'static str; + + fn from_str(s: &str) -> Result { + match s { + "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent), + "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent), + "v1" => Ok(DetachBehavior::NoAncestorAndReparent), + "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent), + _ => Err("cannot parse detach behavior"), + } + } +} + +impl std::fmt::Display for DetachBehavior { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + DetachBehavior::NoAncestorAndReparent => write!(f, "no_ancestor_and_reparent"), + DetachBehavior::MultiLevelAndNoReparent => write!(f, "multi_level_and_no_reparent"), + } + } +} + /// The only [`TenantState`] variants we could be `TenantState::Activating` from. /// /// XXX: We used to have more variants here, but now it's just one, which makes this rather diff --git a/pageserver/client/src/mgmt_api.rs b/pageserver/client/src/mgmt_api.rs index 830fd8a531..508dac231e 100644 --- a/pageserver/client/src/mgmt_api.rs +++ b/pageserver/client/src/mgmt_api.rs @@ -7,7 +7,7 @@ use http_utils::error::HttpErrorBody; use pageserver_api::models::*; use pageserver_api::shard::TenantShardId; pub use reqwest::Body as ReqwestBody; -use reqwest::{Certificate, IntoUrl, Method, StatusCode}; +use reqwest::{Certificate, IntoUrl, Method, StatusCode, Url}; use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; @@ -458,13 +458,21 @@ impl Client { &self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, + behavior: Option, ) -> Result { let uri = format!( "{}/v1/tenant/{tenant_shard_id}/timeline/{timeline_id}/detach_ancestor", self.mgmt_api_endpoint ); + let mut uri = Url::parse(&uri) + .map_err(|e| Error::ApiError(StatusCode::INTERNAL_SERVER_ERROR, format!("{e}")))?; - self.request(Method::PUT, &uri, ()) + if let Some(behavior) = behavior { + uri.query_pairs_mut() + .append_pair("detach_behavior", &behavior.to_string()); + } + + self.request(Method::PUT, uri, ()) .await? .json() .await diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 70c3cc8522..e8a32ca1ef 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -28,9 +28,9 @@ use hyper::{Body, Request, Response, StatusCode, Uri, header}; use metrics::launch_timestamp::LaunchTimestamp; use pageserver_api::models::virtual_file::IoMode; use pageserver_api::models::{ - DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, ListAuxFilesRequest, - LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, LsnLeaseRequest, - OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse, + DetachBehavior, DownloadRemoteLayersTaskSpawnRequest, IngestAuxFilesRequest, + ListAuxFilesRequest, LocationConfig, LocationConfigListResponse, LocationConfigMode, LsnLease, + LsnLeaseRequest, OffloadedTimelineInfo, PageTraceEvent, ShardParameters, StatusResponse, TenantConfigPatchRequest, TenantConfigRequest, TenantDetails, TenantInfo, TenantLocationConfigRequest, TenantLocationConfigResponse, TenantScanRemoteStorageResponse, TenantScanRemoteStorageShard, TenantShardLocation, TenantShardSplitRequest, @@ -72,7 +72,6 @@ use crate::tenant::remote_timeline_client::{ use crate::tenant::secondary::SecondaryController; use crate::tenant::size::ModelInputs; use crate::tenant::storage_layer::{IoConcurrency, LayerAccessStatsReset, LayerName}; -use crate::tenant::timeline::detach_ancestor::DetachBehavior; use crate::tenant::timeline::offload::{OffloadError, offload_timeline}; use crate::tenant::timeline::{ CompactFlags, CompactOptions, CompactRequest, CompactionError, Timeline, WaitLsnTimeout, @@ -2508,6 +2507,7 @@ async fn timeline_detach_ancestor_handler( check_permission(&request, Some(tenant_shard_id.tenant_id))?; let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let behavior: Option = parse_query_param(&request, "detach_behavior")?; + let behavior = behavior.unwrap_or_default(); let span = tracing::info_span!("detach_ancestor", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), %timeline_id); diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 092bfdf6c1..f02247950f 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -14,7 +14,7 @@ use futures::StreamExt; use itertools::Itertools; use once_cell::sync::Lazy; use pageserver_api::key::Key; -use pageserver_api::models::LocationConfigMode; +use pageserver_api::models::{DetachBehavior, LocationConfigMode}; use pageserver_api::shard::{ ShardCount, ShardIdentity, ShardIndex, ShardNumber, ShardStripeSize, TenantShardId, }; @@ -1914,7 +1914,7 @@ impl TenantManager { tenant_shard_id: TenantShardId, timeline_id: TimelineId, prepared: PreparedTimelineDetach, - behavior: detach_ancestor::DetachBehavior, + behavior: DetachBehavior, mut attempt: detach_ancestor::Attempt, ctx: &RequestContext, ) -> Result, detach_ancestor::Error> { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index be861a0c89..face2dfdc1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -45,8 +45,9 @@ use pageserver_api::key::{ use pageserver_api::keyspace::{KeySpaceAccum, KeySpaceRandomAccum, SparseKeyPartitioning}; use pageserver_api::models::{ CompactKeyRange, CompactLsnRange, CompactionAlgorithm, CompactionAlgorithmSettings, - DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, EvictionPolicy, - InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, TimelineState, + DetachBehavior, DownloadRemoteLayersTaskInfo, DownloadRemoteLayersTaskSpawnRequest, + EvictionPolicy, InMemoryLayerInfo, LayerMapInfo, LsnLease, PageTraceEvent, RelSizeMigration, + TimelineState, }; use pageserver_api::reltag::{BlockNumber, RelTag}; use pageserver_api::shard::{ShardIdentity, ShardIndex, ShardNumber, TenantShardId}; @@ -5445,7 +5446,7 @@ impl Timeline { self: &Arc, tenant: &crate::tenant::Tenant, options: detach_ancestor::Options, - behavior: detach_ancestor::DetachBehavior, + behavior: DetachBehavior, ctx: &RequestContext, ) -> Result { detach_ancestor::prepare(self, tenant, behavior, options, ctx).await @@ -5466,7 +5467,7 @@ impl Timeline { prepared: detach_ancestor::PreparedTimelineDetach, ancestor_timeline_id: TimelineId, ancestor_lsn: Lsn, - behavior: detach_ancestor::DetachBehavior, + behavior: DetachBehavior, ctx: &RequestContext, ) -> Result { detach_ancestor::detach_and_reparent( diff --git a/pageserver/src/tenant/timeline/detach_ancestor.rs b/pageserver/src/tenant/timeline/detach_ancestor.rs index c3e4bedc50..ac9d9a4579 100644 --- a/pageserver/src/tenant/timeline/detach_ancestor.rs +++ b/pageserver/src/tenant/timeline/detach_ancestor.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use anyhow::Context; use http_utils::error::ApiError; +use pageserver_api::models::DetachBehavior; use pageserver_api::models::detach_ancestor::AncestorDetached; use pageserver_api::shard::ShardIdentity; use tokio::sync::Semaphore; @@ -139,30 +140,6 @@ pub(crate) struct Options { pub(crate) copy_concurrency: std::num::NonZeroUsize, } -/// Controls the detach ancestor behavior. -/// - When set to `NoAncestorAndReparent`, we will only detach a branch if its ancestor is a root branch. It will automatically reparent any children of the ancestor before and at the branch point. -/// - When set to `MultiLevelAndNoReparent`, we will detach a branch from multiple levels of ancestors, and no reparenting will happen at all. -#[derive(Debug, Clone, Copy, Default)] -pub enum DetachBehavior { - #[default] - NoAncestorAndReparent, - MultiLevelAndNoReparent, -} - -impl std::str::FromStr for DetachBehavior { - type Err = &'static str; - - fn from_str(s: &str) -> Result { - match s { - "no_ancestor_and_reparent" => Ok(DetachBehavior::NoAncestorAndReparent), - "multi_level_and_no_reparent" => Ok(DetachBehavior::MultiLevelAndNoReparent), - "v1" => Ok(DetachBehavior::NoAncestorAndReparent), - "v2" => Ok(DetachBehavior::MultiLevelAndNoReparent), - _ => Err("cannot parse detach behavior"), - } - } -} - impl Default for Options { fn default() -> Self { Self { diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index b27804d820..52e3ef5b0a 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -24,7 +24,7 @@ use pageserver_api::controller_api::{ ShardsPreferredAzsRequest, TenantCreateRequest, TenantPolicyRequest, TenantShardMigrateRequest, }; use pageserver_api::models::{ - TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, + DetachBehavior, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest, TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, }; @@ -525,6 +525,7 @@ async fn handle_tenant_timeline_detach_ancestor( ) -> Result, ApiError> { let tenant_id: TenantId = parse_request_param(&req, "tenant_id")?; let timeline_id: TimelineId = parse_request_param(&req, "timeline_id")?; + let behavior: Option = parse_query_param(&req, "detach_behavior")?; check_permissions(&req, Scope::PageServerApi)?; maybe_rate_limit(&req, tenant_id).await; @@ -537,7 +538,7 @@ async fn handle_tenant_timeline_detach_ancestor( }; let res = service - .tenant_timeline_detach_ancestor(tenant_id, timeline_id) + .tenant_timeline_detach_ancestor(tenant_id, timeline_id, behavior) .await?; json_response(StatusCode::OK, res) diff --git a/storage_controller/src/pageserver_client.rs b/storage_controller/src/pageserver_client.rs index 7fd4f37e7e..05e7aa88c6 100644 --- a/storage_controller/src/pageserver_client.rs +++ b/storage_controller/src/pageserver_client.rs @@ -1,9 +1,9 @@ use pageserver_api::models::detach_ancestor::AncestorDetached; use pageserver_api::models::{ - LocationConfig, LocationConfigListResponse, PageserverUtilization, SecondaryProgress, - TenantScanRemoteStorageResponse, TenantShardSplitRequest, TenantShardSplitResponse, - TenantWaitLsnRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineInfo, - TopTenantShardsRequest, TopTenantShardsResponse, + DetachBehavior, LocationConfig, LocationConfigListResponse, PageserverUtilization, + SecondaryProgress, TenantScanRemoteStorageResponse, TenantShardSplitRequest, + TenantShardSplitResponse, TenantWaitLsnRequest, TimelineArchivalConfigRequest, + TimelineCreateRequest, TimelineInfo, TopTenantShardsRequest, TopTenantShardsResponse, }; use pageserver_api::shard::TenantShardId; use pageserver_client::BlockUnblock; @@ -252,13 +252,14 @@ impl PageserverClient { &self, tenant_shard_id: TenantShardId, timeline_id: TimelineId, + behavior: Option, ) -> Result { measured_request!( "timeline_detach_ancestor", crate::metrics::Method::Put, &self.node_id_label, self.inner - .timeline_detach_ancestor(tenant_shard_id, timeline_id) + .timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior) .await ) } diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 445b174b96..789f4da255 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -34,9 +34,9 @@ use pageserver_api::controller_api::{ TenantShardMigrateResponse, }; use pageserver_api::models::{ - self, LocationConfig, LocationConfigListResponse, LocationConfigMode, PageserverUtilization, - SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters, TenantConfig, - TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, + self, DetachBehavior, LocationConfig, LocationConfigListResponse, LocationConfigMode, + PageserverUtilization, SafekeeperInfo, SafekeepersInfo, SecondaryProgress, ShardParameters, + TenantConfig, TenantConfigPatchRequest, TenantConfigRequest, TenantLocationConfigRequest, TenantLocationConfigResponse, TenantShardLocation, TenantShardSplitRequest, TenantShardSplitResponse, TenantSorting, TenantTimeTravelRequest, TimelineArchivalConfigRequest, TimelineCreateRequest, TimelineCreateResponseStorcon, @@ -4041,6 +4041,7 @@ impl Service { &self, tenant_id: TenantId, timeline_id: TimelineId, + behavior: Option, ) -> Result { tracing::info!("Detaching timeline {tenant_id}/{timeline_id}",); @@ -4064,6 +4065,7 @@ impl Service { node: Node, jwt: Option, ssl_ca_cert: Option, + behavior: Option, ) -> Result<(ShardNumber, models::detach_ancestor::AncestorDetached), ApiError> { tracing::info!( "Detaching timeline on shard {tenant_shard_id}/{timeline_id}, attached to node {node}", @@ -4073,7 +4075,7 @@ impl Service { .map_err(|e| passthrough_api_error(&node, e))?; client - .timeline_detach_ancestor(tenant_shard_id, timeline_id) + .timeline_detach_ancestor(tenant_shard_id, timeline_id, behavior) .await .map_err(|e| { use mgmt_api::Error; @@ -4111,6 +4113,7 @@ impl Service { node, self.config.pageserver_jwt_token.clone(), self.config.ssl_ca_cert.clone(), + behavior, )) }) .await?; diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 61aab2213d..13cab448f3 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -1070,14 +1070,14 @@ class PageserverHttpClient(requests.Session, MetricsGetter): tenant_id: TenantId | TenantShardId, timeline_id: TimelineId, batch_size: int | None = None, - behavior_v2: bool = False, + detach_behavior: str | None = None, **kwargs, ) -> set[TimelineId]: params: dict[str, Any] = {} if batch_size is not None: params["batch_size"] = batch_size - if behavior_v2: - params["detach_behavior"] = "v2" + if detach_behavior: + params["detach_behavior"] = detach_behavior res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/detach_ancestor", params=params, diff --git a/test_runner/regress/test_timeline_detach_ancestor.py b/test_runner/regress/test_timeline_detach_ancestor.py index 79537ba83a..685a32af90 100644 --- a/test_runner/regress/test_timeline_detach_ancestor.py +++ b/test_runner/regress/test_timeline_detach_ancestor.py @@ -407,7 +407,9 @@ def test_ancestor_detach_behavior_v2(neon_env_builder: NeonEnvBuilder): after = env.create_branch("after", ancestor_branch_name="main", ancestor_start_lsn=None) - all_reparented = client.detach_ancestor(env.initial_tenant, branch_to_detach, behavior_v2=True) + all_reparented = client.detach_ancestor( + env.initial_tenant, branch_to_detach, detach_behavior="v2" + ) assert set(all_reparented) == set() env.pageserver.quiesce_tenants() @@ -1350,8 +1352,10 @@ def test_sharded_tad_interleaved_after_partial_success(neon_env_builder: NeonEnv ) +@pytest.mark.parametrize("detach_behavior", ["default", "v1", "v2"]) def test_retryable_500_hit_through_storcon_during_timeline_detach_ancestor( neon_env_builder: NeonEnvBuilder, + detach_behavior: str, ): shard_count = 2 neon_env_builder.num_pageservers = shard_count @@ -1390,7 +1394,11 @@ def test_retryable_500_hit_through_storcon_during_timeline_detach_ancestor( victim_http.configure_failpoints([(pausepoint, "pause"), (failpoint, "return")]) def detach_timeline(): - http.detach_ancestor(env.initial_tenant, detached_branch) + http.detach_ancestor( + env.initial_tenant, + detached_branch, + detach_behavior=detach_behavior if detach_behavior != "default" else None, + ) def paused_at_failpoint(): stuck.assert_log_contains(f"at failpoint {pausepoint}") From 9a3020d2ce038d61ae2b180a6ab698cf2232cb87 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Thu, 13 Mar 2025 20:23:53 +0000 Subject: [PATCH 187/207] chore(proxy): pre-initialise metricvecs (#11226) ## Problem We noticed that error metrics didn't show for some services with light load. This is not great and can cause problems for dashboards/alerts ## Summary of changes Pre-initialise some metricvecs. --- proxy/src/metrics.rs | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/proxy/src/metrics.rs b/proxy/src/metrics.rs index 29834760c0..e5fc0b724b 100644 --- a/proxy/src/metrics.rs +++ b/proxy/src/metrics.rs @@ -30,7 +30,16 @@ pub struct Metrics { static SELF: OnceLock = OnceLock::new(); impl Metrics { pub fn install(thread_pool: Arc) { - SELF.set(Metrics::new(thread_pool)) + let mut metrics = Metrics::new(thread_pool); + + metrics.proxy.errors_total.init_all_dense(); + metrics.proxy.redis_errors_total.init_all_dense(); + metrics.proxy.redis_events_count.init_all_dense(); + metrics.proxy.retries_metric.init_all_dense(); + metrics.proxy.invalid_endpoints_total.init_all_dense(); + metrics.proxy.connection_failures_total.init_all_dense(); + + SELF.set(metrics) .ok() .expect("proxy metrics must not be installed more than once"); } From 4ff000c042b5ad63cd8d9ca209ebb877f61ec6fe Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 21:46:21 +0100 Subject: [PATCH 188/207] pageserver: deflake `test_metadata_image_creation` (#11230) ## Problem `test_metadata_image_creation ` became flaky with #11212, since image compaction may yield to L0 compaction. ## Summary of changes Set `NoYield` when compacting in tenant tests. --- pageserver/src/tenant.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 2bce56345a..7a06d60268 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -7020,6 +7020,7 @@ mod tests { child_timeline.freeze_and_flush().await?; let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceRepartition); + flags.insert(CompactFlags::NoYield); child_timeline .compact(&CancellationToken::new(), flags, &ctx) .await?; @@ -7727,6 +7728,7 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); + flags.insert(CompactFlags::NoYield); flags } else { EnumSet::empty() @@ -7893,7 +7895,6 @@ mod tests { Ok((res, reconstruct_state.get_delta_layers_visited() as usize)) } - #[allow(clippy::needless_range_loop)] for blknum in 0..NUM_KEYS { lsn = Lsn(lsn.0 + 0x10); test_key.field6 = (blknum * STEP) as u32; @@ -7943,6 +7944,7 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); + flags.insert(CompactFlags::NoYield); flags }, &ctx, @@ -8405,6 +8407,7 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); + flags.insert(CompactFlags::NoYield); flags }, &ctx, @@ -8472,6 +8475,7 @@ mod tests { let mut flags = EnumSet::new(); flags.insert(CompactFlags::ForceImageLayerCreation); flags.insert(CompactFlags::ForceRepartition); + flags.insert(CompactFlags::NoYield); flags }, &ctx, From d6d78a050f4d3b807718ba2a1f2fbc0d779b5cc8 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Thu, 13 Mar 2025 22:08:28 +0100 Subject: [PATCH 189/207] pageserver: disable `l0_flush_wait_upload` by default (#11215) ## Problem This is already disabled in production, as it is replaced by L0 flush delays. It will be removed in a later PR, once the config option is no longer specified in production. ## Summary of changes Disable `l0_flush_wait_upload` by default. --- libs/pageserver_api/src/config.rs | 4 +- .../regress/test_attach_tenant_config.py | 2 +- test_runner/regress/test_branching.py | 7 +-- test_runner/regress/test_remote_storage.py | 55 +------------------ 4 files changed, 8 insertions(+), 60 deletions(-) diff --git a/libs/pageserver_api/src/config.rs b/libs/pageserver_api/src/config.rs index e112a57c9d..b12ef65780 100644 --- a/libs/pageserver_api/src/config.rs +++ b/libs/pageserver_api/src/config.rs @@ -289,6 +289,8 @@ pub struct TenantConfigToml { /// If true, Level0 delta layer flushes will wait for S3 upload before flushing the next /// layer. This is a temporary backpressure mechanism which should be removed once /// l0_flush_{delay,stall}_threshold is fully enabled. + /// + /// TODO: this is no longer enabled, remove it when the config option is no longer set. pub l0_flush_wait_upload: bool, // Determines how much history is retained, to allow // branching and read replicas at an older point in time. @@ -576,7 +578,7 @@ pub mod tenant_conf_defaults { pub const DEFAULT_COMPACTION_ALGORITHM: crate::models::CompactionAlgorithm = crate::models::CompactionAlgorithm::Legacy; - pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = true; + pub const DEFAULT_L0_FLUSH_WAIT_UPLOAD: bool = false; pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024; diff --git a/test_runner/regress/test_attach_tenant_config.py b/test_runner/regress/test_attach_tenant_config.py index 07600dd911..b56fcd3500 100644 --- a/test_runner/regress/test_attach_tenant_config.py +++ b/test_runner/regress/test_attach_tenant_config.py @@ -144,7 +144,7 @@ def test_fully_custom_config(positive_env: NeonEnv): "compaction_l0_semaphore": False, "l0_flush_delay_threshold": 25, "l0_flush_stall_threshold": 42, - "l0_flush_wait_upload": False, + "l0_flush_wait_upload": True, "compaction_target_size": 1048576, "checkpoint_distance": 10000, "checkpoint_timeout": "13m", diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 34e4e994cb..85d0cfbf1d 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -19,6 +19,7 @@ from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix from requests import RequestException +from requests.exceptions import RetryError # Test branch creation @@ -180,7 +181,6 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.endpoints.create_start( initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2 ) - ps_http.configure_failpoints(("before-upload-index-pausable", "off")) finally: env.pageserver.stop(immediate=True) @@ -221,10 +221,7 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder branch_id = TimelineId.generate() - with pytest.raises( - PageserverApiException, - match="Cannot branch off the timeline that's not present in pageserver", - ): + with pytest.raises(RetryError, match="too many 503 error responses"): ps_http.timeline_create( env.pg_version, env.initial_tenant, diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index c39c74fa2a..e8721f1ea0 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -29,7 +29,6 @@ from fixtures.remote_storage import ( from fixtures.utils import ( assert_eq, assert_ge, - assert_gt, print_gc_result, query_scalar, wait_until, @@ -334,14 +333,12 @@ def test_remote_storage_upload_queue_retries( # Exponential back-off in upload queue, so, gracious timeouts. wait_until( - lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="upload"), 0), timeout=30 + lambda: assert_ge(get_queued_count(file_kind="layer", op_kind="upload"), 1), timeout=30 ) wait_until( lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 1), timeout=30 ) - wait_until( - lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0), timeout=30 - ) + # There may or may not be deletes queued up behind conflicting uploads; don't check. # unblock churn operations configure_storage_sync_failpoints("off") @@ -786,54 +783,6 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv create_thread.join() -def test_paused_upload_stalls_checkpoint( - neon_env_builder: NeonEnvBuilder, -): - """ - This test checks that checkpoints block on uploads to remote storage. - """ - neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) - - env = neon_env_builder.init_start( - initial_tenant_conf={ - # Set a small compaction threshold - "compaction_threshold": "3", - # Disable GC - "gc_period": "0s", - # disable PITR - "pitr_interval": "0s", - } - ) - - env.pageserver.allowed_errors.append( - f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing" - ) - - tenant_id = env.initial_tenant - timeline_id = env.initial_timeline - - client = env.pageserver.http_client() - layers_at_creation = client.layer_map_info(tenant_id, timeline_id) - deltas_at_creation = len(layers_at_creation.delta_layers()) - assert ( - deltas_at_creation == 1 - ), "are you fixing #5863? make sure we end up with 2 deltas at the end of endpoint lifecycle" - - # Make new layer uploads get stuck. - # Note that timeline creation waits for the initial layers to reach remote storage. - # So at this point, the `layers_at_creation` are in remote storage. - client.configure_failpoints(("before-upload-layer-pausable", "pause")) - - with env.endpoints.create_start("main", tenant_id=tenant_id) as endpoint: - # Build two tables with some data inside - endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)") - wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) - - with pytest.raises(ReadTimeout): - client.timeline_checkpoint(tenant_id, timeline_id, timeout=5) - client.configure_failpoints(("before-upload-layer-pausable", "off")) - - def wait_upload_queue_empty( client: PageserverHttpClient, tenant_id: TenantId, timeline_id: TimelineId ): From 5359cf717c557061652efaa06e14ecf111fe5071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 14 Mar 2025 01:00:37 +0100 Subject: [PATCH 190/207] storcon: add API definitions for exclude_timeline and term_bump (#11197) Adds API definitions for the safekeeper API endpoints `exclude_timeline` and `term_bump`. Also does a bugfix to return the correct type from `delete_timeline`. Part of #8614 --- libs/safekeeper_api/src/models.rs | 5 +++ safekeeper/client/src/mgmt_api.rs | 40 +++++++++++++++++++-- safekeeper/src/http/routes.rs | 5 +-- safekeeper/src/timelines_global_map.rs | 8 +---- storage_controller/src/safekeeper_client.rs | 38 ++++++++++++++++++-- 5 files changed, 83 insertions(+), 13 deletions(-) diff --git a/libs/safekeeper_api/src/models.rs b/libs/safekeeper_api/src/models.rs index 10c703395f..6bdc651668 100644 --- a/libs/safekeeper_api/src/models.rs +++ b/libs/safekeeper_api/src/models.rs @@ -221,6 +221,11 @@ pub struct TimelineMembershipSwitchResponse { pub current_conf: Configuration, } +#[derive(Clone, Copy, Serialize, Deserialize)] +pub struct TimelineDeleteResult { + pub dir_existed: bool, +} + fn lsn_invalid() -> Lsn { Lsn::INVALID } diff --git a/safekeeper/client/src/mgmt_api.rs b/safekeeper/client/src/mgmt_api.rs index 3966aa811f..7ae39ef95e 100644 --- a/safekeeper/client/src/mgmt_api.rs +++ b/safekeeper/client/src/mgmt_api.rs @@ -8,7 +8,7 @@ use std::error::Error as _; use http_utils::error::HttpErrorBody; use reqwest::{IntoUrl, Method, StatusCode}; use safekeeper_api::models::{ - PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, + self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, TimelineStatus, }; use utils::id::{NodeId, TenantId, TimelineId}; @@ -96,11 +96,25 @@ impl Client { resp.json().await.map_err(Error::ReceiveBody) } + pub async fn exclude_timeline( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + req: &models::TimelineMembershipSwitchRequest, + ) -> Result { + let uri = format!( + "{}/v1/tenant/{}/timeline/{}/exclude", + self.mgmt_api_endpoint, tenant_id, timeline_id + ); + let resp = self.put(&uri, req).await?; + resp.json().await.map_err(Error::ReceiveBody) + } + pub async fn delete_timeline( &self, tenant_id: TenantId, timeline_id: TimelineId, - ) -> Result { + ) -> Result { let uri = format!( "{}/v1/tenant/{}/timeline/{}", self.mgmt_api_endpoint, tenant_id, timeline_id @@ -109,6 +123,20 @@ impl Client { resp.json().await.map_err(Error::ReceiveBody) } + pub async fn bump_timeline_term( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + req: &models::TimelineTermBumpRequest, + ) -> Result { + let uri = format!( + "{}/v1/tenant/{}/timeline/{}/term_bump", + self.mgmt_api_endpoint, tenant_id, timeline_id + ); + let resp = self.post(&uri, req).await?; + resp.json().await.map_err(Error::ReceiveBody) + } + pub async fn timeline_status( &self, tenant_id: TenantId, @@ -149,6 +177,14 @@ impl Client { self.request(Method::POST, uri, body).await } + async fn put( + &self, + uri: U, + body: B, + ) -> Result { + self.request(Method::PUT, uri, body).await + } + async fn get(&self, uri: U) -> Result { self.request(Method::GET, uri, ()).await } diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 4f47331c85..21293671e1 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -17,7 +17,8 @@ use hyper::{Body, Request, Response, StatusCode}; use postgres_ffi::WAL_SEGMENT_SIZE; use safekeeper_api::models::{ AcceptorStateStatus, PullTimelineRequest, SafekeeperStatus, SkTimelineInfo, TermSwitchApiEntry, - TimelineCopyRequest, TimelineCreateRequest, TimelineStatus, TimelineTermBumpRequest, + TimelineCopyRequest, TimelineCreateRequest, TimelineDeleteResult, TimelineStatus, + TimelineTermBumpRequest, }; use safekeeper_api::{ServerInfo, membership, models}; use storage_broker::proto::{SafekeeperTimelineInfo, TenantTimelineId as ProtoTenantTimelineId}; @@ -32,7 +33,7 @@ use utils::lsn::Lsn; use crate::debug_dump::TimelineDigestRequest; use crate::safekeeper::TermLsn; -use crate::timelines_global_map::{DeleteOrExclude, TimelineDeleteResult}; +use crate::timelines_global_map::DeleteOrExclude; use crate::{ GlobalTimelines, SafeKeeperConf, copy_timeline, debug_dump, patch_control_file, pull_timeline, }; diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index 858dfce807..41abee369e 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -11,9 +11,8 @@ use anyhow::{Context, Result, bail}; use camino::Utf8PathBuf; use camino_tempfile::Utf8TempDir; use safekeeper_api::membership::Configuration; -use safekeeper_api::models::SafekeeperUtilization; +use safekeeper_api::models::{SafekeeperUtilization, TimelineDeleteResult}; use safekeeper_api::{ServerInfo, membership}; -use serde::Serialize; use tokio::fs; use tracing::*; use utils::crashsafe::{durable_rename, fsync_async_opt}; @@ -579,11 +578,6 @@ impl GlobalTimelines { } } -#[derive(Clone, Copy, Serialize)] -pub struct TimelineDeleteResult { - pub dir_existed: bool, -} - /// Action for delete_or_exclude. #[derive(Clone, Debug)] pub enum DeleteOrExclude { diff --git a/storage_controller/src/safekeeper_client.rs b/storage_controller/src/safekeeper_client.rs index 1533b6c086..a44fcc27d2 100644 --- a/storage_controller/src/safekeeper_client.rs +++ b/storage_controller/src/safekeeper_client.rs @@ -1,5 +1,5 @@ use safekeeper_api::models::{ - PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, + self, PullTimelineRequest, PullTimelineResponse, SafekeeperUtilization, TimelineCreateRequest, TimelineStatus, }; use safekeeper_client::mgmt_api::{Client, Result}; @@ -69,11 +69,28 @@ impl SafekeeperClient { ) } + #[allow(unused)] + pub(crate) async fn exclude_timeline( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + req: &models::TimelineMembershipSwitchRequest, + ) -> Result { + measured_request!( + "exclude_timeline", + crate::metrics::Method::Post, + &self.node_id_label, + self.inner + .exclude_timeline(tenant_id, timeline_id, req) + .await + ) + } + pub(crate) async fn delete_timeline( &self, tenant_id: TenantId, timeline_id: TimelineId, - ) -> Result { + ) -> Result { measured_request!( "delete_timeline", crate::metrics::Method::Delete, @@ -94,6 +111,23 @@ impl SafekeeperClient { ) } + #[allow(unused)] + pub(crate) async fn bump_timeline_term( + &self, + tenant_id: TenantId, + timeline_id: TimelineId, + req: &models::TimelineTermBumpRequest, + ) -> Result { + measured_request!( + "term_bump", + crate::metrics::Method::Post, + &self.node_id_label, + self.inner + .bump_timeline_term(tenant_id, timeline_id, req) + .await + ) + } + pub(crate) async fn get_utilization(&self) -> Result { measured_request!( "utilization", From 04370b48b30a2ba63a2e17fbc0405ec9e403ff50 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Fri, 14 Mar 2025 12:21:16 +0100 Subject: [PATCH 191/207] fix(storcon): optimization validation makes decisions based on wrong SecondaryProgress (#11229) # Refs - fixes https://github.com/neondatabase/neon/issues/11228 # Problem High-Level When storcon validates whether a `ScheduleOptimizationAction` should be applied, it retrieves the `tenant_secondary_status` to determine whether a secondary is ready for the optimization. When collecting results, it associates secondary statuses with the wrong optimization actions in the batch of optimizations that we're validating. The result is that we make the decision for shard/location X based on the SecondaryStatus of a random secondary location Y in the current batch of optimizations. A possible symptom is an early cutover, as seen in this engineering investigation here: - https://github.com/neondatabase/cloud/issues/25734 # Problem Code-Level This code here in `optimize_all_validate` https://github.com/neondatabase/neon/blob/97e2e27f682003bcc8ac1c9e625bc3675f394264/storage_controller/src/service.rs#L7012-L7029 zips the `want_secondary_status` with the Vec returned from `tenant_for_shards_api` . However, the Vec returned from `want_secondary_status` is not ordered (it uses FuturesUnordered internally). # Solution Sort the Vec in input order before returning it. `optimize_all_validate` was the only caller affected by this problem While at it, also future-proof similar-looking function `tenant_for_shards`. None of its callers care about the order, but this type of function signature is easy to use incorrectly. # Future Work Avoid the additional iteration, map, and allocation. Change API to leverage AsyncFn (async closure). And/or invert `tenant_for_shards_api` into a Future ext trait / iterator adaptor thing. --- storage_controller/src/service.rs | 52 ++++++++++++++++++------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 789f4da255..f33408a89b 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -4268,7 +4268,8 @@ impl Service { /// Helper for concurrently calling a pageserver API on a number of shards, such as timeline creation. /// - /// On success, the returned vector contains exactly the same number of elements as the input `locations`. + /// On success, the returned vector contains exactly the same number of elements as the input `locations` + /// and returned element at index `i` is the result for `req_fn(op(locations[i])`. async fn tenant_for_shards( &self, locations: Vec<(TenantShardId, Node)>, @@ -4284,18 +4285,23 @@ impl Service { let mut futs = FuturesUnordered::new(); let mut results = Vec::with_capacity(locations.len()); - for (tenant_shard_id, node) in locations { - futs.push(req_fn(tenant_shard_id, node)); + for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() { + let fut = req_fn(tenant_shard_id, node); + futs.push(async move { (idx, fut.await) }); } - while let Some(r) = futs.next().await { - results.push(r?); + while let Some((idx, r)) = futs.next().await { + results.push((idx, r?)); } - Ok(results) + results.sort_by_key(|(idx, _)| *idx); + Ok(results.into_iter().map(|(_, r)| r).collect()) } - /// Concurrently invoke a pageserver API call on many shards at once + /// Concurrently invoke a pageserver API call on many shards at once. + /// + /// The returned Vec has the same length as the `locations` Vec, + /// and returned element at index `i` is the result for `op(locations[i])`. pub(crate) async fn tenant_for_shards_api( &self, locations: Vec<(TenantShardId, Node)>, @@ -4312,27 +4318,29 @@ impl Service { let mut futs = FuturesUnordered::new(); let mut results = Vec::with_capacity(locations.len()); - for (tenant_shard_id, node) in locations { + for (idx, (tenant_shard_id, node)) in locations.into_iter().enumerate() { futs.push(async move { - node.with_client_retries( - |client| op(tenant_shard_id, client), - &self.config.pageserver_jwt_token, - &self.config.ssl_ca_cert, - warn_threshold, - max_retries, - timeout, - cancel, - ) - .await + let r = node + .with_client_retries( + |client| op(tenant_shard_id, client), + &self.config.pageserver_jwt_token, + &self.config.ssl_ca_cert, + warn_threshold, + max_retries, + timeout, + cancel, + ) + .await; + (idx, r) }); } - while let Some(r) = futs.next().await { - let r = r.unwrap_or(Err(mgmt_api::Error::Cancelled)); - results.push(r); + while let Some((idx, r)) = futs.next().await { + results.push((idx, r.unwrap_or(Err(mgmt_api::Error::Cancelled)))); } - results + results.sort_by_key(|(idx, _)| *idx); + results.into_iter().map(|(_, r)| r).collect() } /// Helper for safely working with the shards in a tenant remotely on pageservers, for example From f68be2b5e26321a506cc5afd14b826606031a89c Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 14 Mar 2025 15:41:22 +0400 Subject: [PATCH 192/207] safekeeper: https for management API (#11171) ## Problem Storage controller uses unencrypted HTTP requests for safekeeper management API. - Closes: https://github.com/neondatabase/cloud/issues/24836 ## Summary of changes - Replace `hyper0::server::Server` with `http_utils::server::Server` in safekeeper. - Add HTTPS handler for safekeeper management API. --- Cargo.lock | 6 +- control_plane/src/local_env.rs | 11 +++- control_plane/src/safekeeper.rs | 22 ++++++++ control_plane/src/storage_controller.rs | 4 ++ libs/http-utils/Cargo.toml | 2 + libs/http-utils/src/lib.rs | 1 + libs/http-utils/src/tls_certs.rs | 21 +++++++ pageserver/Cargo.toml | 2 - pageserver/src/bin/pageserver.rs | 24 +------- safekeeper/Cargo.toml | 6 +- safekeeper/src/bin/safekeeper.rs | 55 ++++++++++++++++++- safekeeper/src/http/mod.rs | 38 +++++++++++-- safekeeper/src/http/routes.rs | 11 +++- safekeeper/src/lib.rs | 12 ++++ safekeeper/src/pull_timeline.rs | 23 +++++--- .../tests/walproposer_sim/safekeeper.rs | 4 ++ test_runner/fixtures/neon_fixtures.py | 14 ++++- test_runner/regress/test_ssl.py | 55 ++++++++++++++++++- test_runner/regress/test_wal_acceptor.py | 1 + 19 files changed, 264 insertions(+), 48 deletions(-) create mode 100644 libs/http-utils/src/tls_certs.rs diff --git a/Cargo.lock b/Cargo.lock index 898ff1eabb..12fa3589f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2863,6 +2863,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bytes", + "camino", "fail", "futures", "hyper 0.14.30", @@ -2873,6 +2874,7 @@ dependencies = [ "pprof", "regex", "routerify", + "rustls-pemfile 2.1.1", "serde", "serde_json", "serde_path_to_error", @@ -4327,8 +4329,6 @@ dependencies = [ "reqwest", "rpds", "rustls 0.23.18", - "rustls-pemfile 2.1.1", - "rustls-pki-types", "scopeguard", "send-future", "serde", @@ -6044,6 +6044,7 @@ dependencies = [ "regex", "remote_storage", "reqwest", + "rustls 0.23.18", "safekeeper_api", "safekeeper_client", "scopeguard", @@ -6060,6 +6061,7 @@ dependencies = [ "tokio", "tokio-io-timeout", "tokio-postgres", + "tokio-rustls 0.26.0", "tokio-stream", "tokio-tar", "tokio-util", diff --git a/control_plane/src/local_env.rs b/control_plane/src/local_env.rs index 2e57236ddb..f0a11106bd 100644 --- a/control_plane/src/local_env.rs +++ b/control_plane/src/local_env.rs @@ -149,7 +149,7 @@ pub struct NeonBroker { pub listen_addr: SocketAddr, } -/// Broker config for cluster internal communication. +/// A part of storage controller's config the neon_local knows about. #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] #[serde(default)] pub struct NeonStorageControllerConf { @@ -176,10 +176,11 @@ pub struct NeonStorageControllerConf { #[serde(with = "humantime_serde")] pub long_reconcile_threshold: Option, - #[serde(default)] pub use_https_pageserver_api: bool, pub timelines_onto_safekeepers: bool, + + pub use_https_safekeeper_api: bool, } impl NeonStorageControllerConf { @@ -205,6 +206,7 @@ impl Default for NeonStorageControllerConf { long_reconcile_threshold: None, use_https_pageserver_api: false, timelines_onto_safekeepers: false, + use_https_safekeeper_api: false, } } } @@ -302,6 +304,7 @@ pub struct SafekeeperConf { pub pg_port: u16, pub pg_tenant_only_port: Option, pub http_port: u16, + pub https_port: Option, pub sync: bool, pub remote_storage: Option, pub backup_threads: Option, @@ -316,6 +319,7 @@ impl Default for SafekeeperConf { pg_port: 0, pg_tenant_only_port: None, http_port: 0, + https_port: None, sync: true, remote_storage: None, backup_threads: None, @@ -845,6 +849,9 @@ impl LocalEnv { // create safekeeper dirs for safekeeper in &env.safekeepers { fs::create_dir_all(SafekeeperNode::datadir_path_by_id(&env, safekeeper.id))?; + SafekeeperNode::from_env(&env, safekeeper) + .initialize() + .context("safekeeper init failed")?; } // initialize pageserver state diff --git a/control_plane/src/safekeeper.rs b/control_plane/src/safekeeper.rs index 70915d5aaf..231871852e 100644 --- a/control_plane/src/safekeeper.rs +++ b/control_plane/src/safekeeper.rs @@ -111,6 +111,18 @@ impl SafekeeperNode { .expect("non-Unicode path") } + /// Initializes a safekeeper node by creating all necessary files, + /// e.g. SSL certificates. + pub fn initialize(&self) -> anyhow::Result<()> { + if self.env.generate_local_ssl_certs { + self.env.generate_ssl_cert( + &self.datadir_path().join("server.crt"), + &self.datadir_path().join("server.key"), + )?; + } + Ok(()) + } + pub async fn start( &self, extra_opts: &[String], @@ -196,6 +208,16 @@ impl SafekeeperNode { ]); } + if let Some(https_port) = self.conf.https_port { + args.extend([ + "--listen-https".to_owned(), + format!("{}:{}", self.listen_addr, https_port), + ]); + } + if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() { + args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap())); + } + args.extend_from_slice(extra_opts); background_process::start_process( diff --git a/control_plane/src/storage_controller.rs b/control_plane/src/storage_controller.rs index e28fd70fdf..0c78f2e18e 100644 --- a/control_plane/src/storage_controller.rs +++ b/control_plane/src/storage_controller.rs @@ -538,6 +538,10 @@ impl StorageController { args.push("--use-https-pageserver-api".to_string()); } + if self.config.use_https_safekeeper_api { + args.push("--use-https-safekeeper-api".to_string()); + } + if let Some(ssl_ca_file) = self.env.ssl_ca_cert_path() { args.push(format!("--ssl-ca-file={}", ssl_ca_file.to_str().unwrap())); } diff --git a/libs/http-utils/Cargo.toml b/libs/http-utils/Cargo.toml index 00b3777a63..331ae4a9b8 100644 --- a/libs/http-utils/Cargo.toml +++ b/libs/http-utils/Cargo.toml @@ -7,6 +7,7 @@ license.workspace = true [dependencies] anyhow.workspace = true bytes.workspace = true +camino.workspace = true fail.workspace = true futures.workspace = true hyper0.workspace = true @@ -16,6 +17,7 @@ once_cell.workspace = true pprof.workspace = true regex.workspace = true routerify.workspace = true +rustls-pemfile.workspace = true serde.workspace = true serde_json.workspace = true serde_path_to_error.workspace = true diff --git a/libs/http-utils/src/lib.rs b/libs/http-utils/src/lib.rs index dd520ef69b..2bd0fe582f 100644 --- a/libs/http-utils/src/lib.rs +++ b/libs/http-utils/src/lib.rs @@ -4,6 +4,7 @@ pub mod failpoints; pub mod json; pub mod request; pub mod server; +pub mod tls_certs; extern crate hyper0 as hyper; diff --git a/libs/http-utils/src/tls_certs.rs b/libs/http-utils/src/tls_certs.rs new file mode 100644 index 0000000000..db9ec825ed --- /dev/null +++ b/libs/http-utils/src/tls_certs.rs @@ -0,0 +1,21 @@ +use camino::Utf8Path; +use tokio_rustls::rustls::pki_types::{CertificateDer, PrivateKeyDer}; + +pub fn load_cert_chain(filename: &Utf8Path) -> anyhow::Result>> { + let file = std::fs::File::open(filename)?; + let mut reader = std::io::BufReader::new(file); + + Ok(rustls_pemfile::certs(&mut reader).collect::, _>>()?) +} + +pub fn load_private_key(filename: &Utf8Path) -> anyhow::Result> { + let file = std::fs::File::open(filename)?; + let mut reader = std::io::BufReader::new(file); + + let key = rustls_pemfile::private_key(&mut reader)?; + + key.ok_or(anyhow::anyhow!( + "no private key found in {}", + filename.as_str(), + )) +} diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index d17a19ce65..56d97bf8a9 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -48,8 +48,6 @@ pprof.workspace = true rand.workspace = true range-set-blaze = { version = "0.1.16", features = ["alloc"] } regex.workspace = true -rustls-pemfile.workspace = true -rustls-pki-types.workspace = true rustls.workspace = true scopeguard.workspace = true send-future.workspace = true diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 4d30a6358b..3ab6d79546 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -30,7 +30,6 @@ use pageserver::{ }; use postgres_backend::AuthType; use remote_storage::GenericRemoteStorage; -use rustls_pki_types::{CertificateDer, PrivateKeyDer}; use tokio::signal::unix::SignalKind; use tokio::time::Instant; use tokio_util::sync::CancellationToken; @@ -622,8 +621,8 @@ fn start_pageserver( let https_task = match https_listener { Some(https_listener) => { - let certs = load_certs(&conf.ssl_cert_file)?; - let key = load_private_key(&conf.ssl_key_file)?; + let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?; + let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?; let server_config = rustls::ServerConfig::builder() .with_no_client_auth() @@ -735,25 +734,6 @@ fn start_pageserver( }) } -fn load_certs(filename: &Utf8Path) -> std::io::Result>> { - let file = std::fs::File::open(filename)?; - let mut reader = std::io::BufReader::new(file); - - rustls_pemfile::certs(&mut reader).collect() -} - -fn load_private_key(filename: &Utf8Path) -> anyhow::Result> { - let file = std::fs::File::open(filename)?; - let mut reader = std::io::BufReader::new(file); - - let key = rustls_pemfile::private_key(&mut reader)?; - - key.ok_or(anyhow::anyhow!( - "no private key found in {}", - filename.as_str(), - )) -} - async fn create_remote_storage_client( conf: &'static PageServerConf, ) -> anyhow::Result { diff --git a/safekeeper/Cargo.toml b/safekeeper/Cargo.toml index bb937ad56a..965aa7504b 100644 --- a/safekeeper/Cargo.toml +++ b/safekeeper/Cargo.toml @@ -35,8 +35,9 @@ postgres-protocol.workspace = true pprof.workspace = true rand.workspace = true regex.workspace = true -scopeguard.workspace = true reqwest = { workspace = true, features = ["json"] } +rustls.workspace = true +scopeguard.workspace = true serde.workspace = true serde_json.workspace = true smallvec.workspace = true @@ -45,10 +46,11 @@ strum_macros.workspace = true thiserror.workspace = true tikv-jemallocator.workspace = true tokio = { workspace = true, features = ["fs"] } -tokio-util = { workspace = true } tokio-io-timeout.workspace = true tokio-postgres.workspace = true +tokio-rustls.workspace = true tokio-tar.workspace = true +tokio-util = { workspace = true } tracing.workspace = true url.workspace = true metrics.workspace = true diff --git a/safekeeper/src/bin/safekeeper.rs b/safekeeper/src/bin/safekeeper.rs index 10fc4a4b59..9ca79de179 100644 --- a/safekeeper/src/bin/safekeeper.rs +++ b/safekeeper/src/bin/safekeeper.rs @@ -16,10 +16,12 @@ use futures::stream::FuturesUnordered; use futures::{FutureExt, StreamExt}; use metrics::set_build_info_metric; use remote_storage::RemoteStorageConfig; +use reqwest::Certificate; use safekeeper::defaults::{ DEFAULT_CONTROL_FILE_SAVE_INTERVAL, DEFAULT_EVICTION_MIN_RESIDENT, DEFAULT_HEARTBEAT_TIMEOUT, DEFAULT_HTTP_LISTEN_ADDR, DEFAULT_MAX_OFFLOADER_LAG_BYTES, DEFAULT_PARTIAL_BACKUP_CONCURRENCY, - DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, + DEFAULT_PARTIAL_BACKUP_TIMEOUT, DEFAULT_PG_LISTEN_ADDR, DEFAULT_SSL_CERT_FILE, + DEFAULT_SSL_KEY_FILE, }; use safekeeper::{ BROKER_RUNTIME, GlobalTimelines, HTTP_RUNTIME, SafeKeeperConf, WAL_SERVICE_RUNTIME, broker, @@ -94,6 +96,9 @@ struct Args { /// Listen http endpoint for management and metrics in the form host:port. #[arg(long, default_value = DEFAULT_HTTP_LISTEN_ADDR)] listen_http: String, + /// Listen https endpoint for management and metrics in the form host:port. + #[arg(long, default_value = None)] + listen_https: Option, /// Advertised endpoint for receiving/sending WAL in the form host:port. If not /// specified, listen_pg is used to advertise instead. #[arg(long, default_value = None)] @@ -203,6 +208,15 @@ struct Args { /// and the current position of the reader is smaller than this value. #[arg(long)] max_delta_for_fanout: Option, + /// Path to a file with certificate's private key for https API. + #[arg(long, default_value = DEFAULT_SSL_KEY_FILE)] + ssl_key_file: Utf8PathBuf, + /// Path to a file with a X509 certificate for https API. + #[arg(long, default_value = DEFAULT_SSL_CERT_FILE)] + ssl_cert_file: Utf8PathBuf, + /// Trusted root CA certificate to use in https APIs. + #[arg(long)] + ssl_ca_file: Option, } // Like PathBufValueParser, but allows empty string. @@ -336,12 +350,22 @@ async fn main() -> anyhow::Result<()> { } }; + let ssl_ca_cert = match args.ssl_ca_file.as_ref() { + Some(ssl_ca_file) => { + tracing::info!("Using ssl root CA file: {ssl_ca_file:?}"); + let buf = tokio::fs::read(ssl_ca_file).await?; + Some(Certificate::from_pem(&buf)?) + } + None => None, + }; + let conf = Arc::new(SafeKeeperConf { workdir, my_id: id, listen_pg_addr: args.listen_pg, listen_pg_addr_tenant_only: args.listen_pg_tenant_only, listen_http_addr: args.listen_http, + listen_https_addr: args.listen_https, advertise_pg_addr: args.advertise_pg, availability_zone: args.availability_zone, no_sync: args.no_sync, @@ -368,6 +392,9 @@ async fn main() -> anyhow::Result<()> { eviction_min_resident: args.eviction_min_resident, wal_reader_fanout: args.wal_reader_fanout, max_delta_for_fanout: args.max_delta_for_fanout, + ssl_key_file: args.ssl_key_file, + ssl_cert_file: args.ssl_cert_file, + ssl_ca_cert, }); // initialize sentry if SENTRY_DSN is provided @@ -428,6 +455,17 @@ async fn start_safekeeper(conf: Arc) -> Result<()> { e })?; + let https_listener = match conf.listen_https_addr.as_ref() { + Some(listen_https_addr) => { + info!("starting safekeeper HTTPS service on {}", listen_https_addr); + Some(tcp_listener::bind(listen_https_addr).map_err(|e| { + error!("failed to bind to address {}: {}", listen_https_addr, e); + e + })?) + } + None => None, + }; + let global_timelines = Arc::new(GlobalTimelines::new(conf.clone())); // Register metrics collector for active timelines. It's important to do this @@ -501,7 +539,7 @@ async fn start_safekeeper(conf: Arc) -> Result<()> { let http_handle = current_thread_rt .as_ref() .unwrap_or_else(|| HTTP_RUNTIME.handle()) - .spawn(http::task_main( + .spawn(http::task_main_http( conf.clone(), http_listener, global_timelines.clone(), @@ -509,6 +547,19 @@ async fn start_safekeeper(conf: Arc) -> Result<()> { .map(|res| ("HTTP service main".to_owned(), res)); tasks_handles.push(Box::pin(http_handle)); + if let Some(https_listener) = https_listener { + let https_handle = current_thread_rt + .as_ref() + .unwrap_or_else(|| HTTP_RUNTIME.handle()) + .spawn(http::task_main_https( + conf.clone(), + https_listener, + global_timelines.clone(), + )) + .map(|res| ("HTTPS service main".to_owned(), res)); + tasks_handles.push(Box::pin(https_handle)); + } + let broker_task_handle = current_thread_rt .as_ref() .unwrap_or_else(|| BROKER_RUNTIME.handle()) diff --git a/safekeeper/src/http/mod.rs b/safekeeper/src/http/mod.rs index f162985ef7..4908863a4b 100644 --- a/safekeeper/src/http/mod.rs +++ b/safekeeper/src/http/mod.rs @@ -3,10 +3,11 @@ use std::sync::Arc; pub use routes::make_router; pub use safekeeper_api::models; +use tokio_util::sync::CancellationToken; use crate::{GlobalTimelines, SafeKeeperConf}; -pub async fn task_main( +pub async fn task_main_http( conf: Arc, http_listener: std::net::TcpListener, global_timelines: Arc, @@ -14,8 +15,37 @@ pub async fn task_main( let router = make_router(conf, global_timelines) .build() .map_err(|err| anyhow::anyhow!(err))?; - let service = http_utils::RouterService::new(router).unwrap(); - let server = hyper::Server::from_tcp(http_listener)?; - server.serve(service).await?; + + let service = Arc::new( + http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?, + ); + let server = http_utils::server::Server::new(service, http_listener, None)?; + server.serve(CancellationToken::new()).await?; + Ok(()) // unreachable +} + +pub async fn task_main_https( + conf: Arc, + https_listener: std::net::TcpListener, + global_timelines: Arc, +) -> anyhow::Result<()> { + let certs = http_utils::tls_certs::load_cert_chain(&conf.ssl_cert_file)?; + let key = http_utils::tls_certs::load_private_key(&conf.ssl_key_file)?; + + let server_config = rustls::ServerConfig::builder() + .with_no_client_auth() + .with_single_cert(certs, key)?; + + let tls_acceptor = tokio_rustls::TlsAcceptor::from(Arc::new(server_config)); + + let router = make_router(conf, global_timelines) + .build() + .map_err(|err| anyhow::anyhow!(err))?; + + let service = Arc::new( + http_utils::RequestServiceBuilder::new(router).map_err(|err| anyhow::anyhow!(err))?, + ); + let server = http_utils::server::Server::new(service, https_listener, Some(tls_acceptor))?; + server.serve(CancellationToken::new()).await?; Ok(()) // unreachable } diff --git a/safekeeper/src/http/routes.rs b/safekeeper/src/http/routes.rs index 21293671e1..3299d77545 100644 --- a/safekeeper/src/http/routes.rs +++ b/safekeeper/src/http/routes.rs @@ -232,9 +232,14 @@ async fn timeline_pull_handler(mut request: Request) -> Result, pub listen_http_addr: String, + pub listen_https_addr: Option, pub advertise_pg_addr: Option, pub availability_zone: Option, pub no_sync: bool, @@ -111,6 +116,9 @@ pub struct SafeKeeperConf { pub eviction_min_resident: Duration, pub wal_reader_fanout: bool, pub max_delta_for_fanout: Option, + pub ssl_key_file: Utf8PathBuf, + pub ssl_cert_file: Utf8PathBuf, + pub ssl_ca_cert: Option, } impl SafeKeeperConf { @@ -127,6 +135,7 @@ impl SafeKeeperConf { listen_pg_addr: defaults::DEFAULT_PG_LISTEN_ADDR.to_string(), listen_pg_addr_tenant_only: None, listen_http_addr: defaults::DEFAULT_HTTP_LISTEN_ADDR.to_string(), + listen_https_addr: None, advertise_pg_addr: None, availability_zone: None, remote_storage: None, @@ -155,6 +164,9 @@ impl SafeKeeperConf { eviction_min_resident: Duration::ZERO, wal_reader_fanout: false, max_delta_for_fanout: None, + ssl_key_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_KEY_FILE), + ssl_cert_file: Utf8PathBuf::from(defaults::DEFAULT_SSL_CERT_FILE), + ssl_ca_cert: None, } } } diff --git a/safekeeper/src/pull_timeline.rs b/safekeeper/src/pull_timeline.rs index 7d6ce1269c..dab8142dfb 100644 --- a/safekeeper/src/pull_timeline.rs +++ b/safekeeper/src/pull_timeline.rs @@ -8,6 +8,7 @@ use camino::Utf8PathBuf; use chrono::{DateTime, Utc}; use futures::{SinkExt, StreamExt, TryStreamExt}; use postgres_ffi::{PG_TLI, XLogFileName, XLogSegNo}; +use reqwest::Certificate; use safekeeper_api::Term; use safekeeper_api::models::{PullTimelineRequest, PullTimelineResponse, TimelineStatus}; use safekeeper_client::mgmt_api; @@ -392,6 +393,7 @@ pub struct DebugDumpResponse { pub async fn handle_request( request: PullTimelineRequest, sk_auth_token: Option, + ssl_ca_cert: Option, global_timelines: Arc, ) -> Result { let existing_tli = global_timelines.get(TenantTimelineId::new( @@ -402,9 +404,11 @@ pub async fn handle_request( bail!("Timeline {} already exists", request.timeline_id); } - // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's - // part of https support (#24836). - let http_client = reqwest::Client::new(); + let mut http_client = reqwest::Client::builder(); + if let Some(ssl_ca_cert) = ssl_ca_cert { + http_client = http_client.add_root_certificate(ssl_ca_cert); + } + let http_client = http_client.build()?; let http_hosts = request.http_hosts.clone(); @@ -441,13 +445,21 @@ pub async fn handle_request( assert!(status.tenant_id == request.tenant_id); assert!(status.timeline_id == request.timeline_id); - pull_timeline(status, safekeeper_host, sk_auth_token, global_timelines).await + pull_timeline( + status, + safekeeper_host, + sk_auth_token, + http_client, + global_timelines, + ) + .await } async fn pull_timeline( status: TimelineStatus, host: String, sk_auth_token: Option, + http_client: reqwest::Client, global_timelines: Arc, ) -> Result { let ttid = TenantTimelineId::new(status.tenant_id, status.timeline_id); @@ -464,9 +476,6 @@ async fn pull_timeline( let conf = &global_timelines.get_global_config(); let (_tmp_dir, tli_dir_path) = create_temp_timeline_dir(conf, ttid).await?; - // TODO(DimasKovas): add ssl root CA certificate when implementing safekeeper's - // part of https support (#24836). - let http_client = reqwest::Client::new(); let client = Client::new(http_client, host.clone(), sk_auth_token.clone()); // Request stream with basebackup archive. let bb_resp = client diff --git a/safekeeper/tests/walproposer_sim/safekeeper.rs b/safekeeper/tests/walproposer_sim/safekeeper.rs index 6ce1a9940e..0dfdafcc51 100644 --- a/safekeeper/tests/walproposer_sim/safekeeper.rs +++ b/safekeeper/tests/walproposer_sim/safekeeper.rs @@ -152,6 +152,7 @@ pub fn run_server(os: NodeOs, disk: Arc) -> Result<()> { my_id: NodeId(os.id() as u64), listen_pg_addr: String::new(), listen_http_addr: String::new(), + listen_https_addr: None, no_sync: false, broker_endpoint: "/".parse::().unwrap(), broker_keepalive_interval: Duration::from_secs(0), @@ -179,6 +180,9 @@ pub fn run_server(os: NodeOs, disk: Arc) -> Result<()> { eviction_min_resident: Duration::ZERO, wal_reader_fanout: false, max_delta_for_fanout: None, + ssl_key_file: Utf8PathBuf::from(""), + ssl_cert_file: Utf8PathBuf::from(""), + ssl_ca_cert: None, }; let mut global = GlobalMap::new(disk, conf.clone())?; diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 11ca1d7913..deff02f0f9 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -466,6 +466,9 @@ class NeonEnvBuilder: # Flag to enable https listener in pageserver, generate local ssl certs, # and force storage controller to use https for pageserver api. self.use_https_pageserver_api: bool = False + # Flag to enable https listener in safekeeper, generate local ssl certs, + # and force storage controller to use https for safekeeper api. + self.use_https_safekeeper_api: bool = False self.pageserver_virtual_file_io_engine: str | None = pageserver_virtual_file_io_engine self.pageserver_get_vectored_concurrent_io: str | None = ( @@ -1063,7 +1066,9 @@ class NeonEnv: self.initial_tenant = config.initial_tenant self.initial_timeline = config.initial_timeline - self.generate_local_ssl_certs = config.use_https_pageserver_api + self.generate_local_ssl_certs = ( + config.use_https_pageserver_api or config.use_https_safekeeper_api + ) self.ssl_ca_file = ( self.repo_dir.joinpath("rootCA.crt") if self.generate_local_ssl_certs else None ) @@ -1146,6 +1151,10 @@ class NeonEnv: storage_controller_config = storage_controller_config or {} storage_controller_config["use_https_pageserver_api"] = True + if config.use_https_safekeeper_api: + storage_controller_config = storage_controller_config or {} + storage_controller_config["use_https_safekeeper_api"] = True + if storage_controller_config is not None: cfg["storage_controller"] = storage_controller_config @@ -1248,6 +1257,7 @@ class NeonEnv: pg=self.port_distributor.get_port(), pg_tenant_only=self.port_distributor.get_port(), http=self.port_distributor.get_port(), + https=self.port_distributor.get_port() if config.use_https_safekeeper_api else None, ) id = config.safekeepers_id_start + i # assign ids sequentially sk_cfg: dict[str, Any] = { @@ -1255,6 +1265,7 @@ class NeonEnv: "pg_port": port.pg, "pg_tenant_only_port": port.pg_tenant_only, "http_port": port.http, + "https_port": port.https, "sync": config.safekeepers_enable_fsync, } if config.auth_enabled: @@ -4475,6 +4486,7 @@ class SafekeeperPort: pg: int pg_tenant_only: int http: int + https: int | None @dataclass diff --git a/test_runner/regress/test_ssl.py b/test_runner/regress/test_ssl.py index 25d839aa42..7db4a16f49 100644 --- a/test_runner/regress/test_ssl.py +++ b/test_runner/regress/test_ssl.py @@ -1,5 +1,7 @@ +import pytest import requests -from fixtures.neon_fixtures import NeonEnvBuilder +from fixtures.neon_fixtures import NeonEnvBuilder, StorageControllerApiException +from fixtures.utils import wait_until def test_pageserver_https_api(neon_env_builder: NeonEnvBuilder): @@ -13,3 +15,54 @@ def test_pageserver_https_api(neon_env_builder: NeonEnvBuilder): addr = f"https://localhost:{env.pageserver.service_port.https}/v1/status" requests.get(addr, verify=str(env.ssl_ca_file)).raise_for_status() + + +def test_safekeeper_https_api(neon_env_builder: NeonEnvBuilder): + """ + Test HTTPS safekeeper management API. + 1. Make /v1/status request to HTTPS API to ensure it's appropriately configured. + 2. Try to register safekeeper in storcon with https port missing. + 3. Register safekeeper with https port. + 4. Wait for a heartbeat round to complete. + """ + neon_env_builder.use_https_safekeeper_api = True + env = neon_env_builder.init_start() + + sk = env.safekeepers[0] + + # 1. Make simple https request. + addr = f"https://localhost:{sk.port.https}/v1/status" + requests.get(addr, verify=str(env.ssl_ca_file)).raise_for_status() + + # Note: http_port is intentionally wrong. + # Storcon should not use it if use_https is on. + http_port = 0 + + body = { + "active": True, + "id": sk.id, + "created_at": "2023-10-25T09:11:25Z", + "updated_at": "2024-08-28T11:32:43Z", + "region_id": "aws-us-east-2", + "host": "localhost", + "port": sk.port.pg, + "http_port": http_port, + "https_port": None, + "version": 5957, + "availability_zone_id": "us-east-2b", + } + # 2. Try register with https port missing. + with pytest.raises(StorageControllerApiException, match="https port is not specified"): + env.storage_controller.on_safekeeper_deploy(sk.id, body) + + # 3. Register with https port. + body["https_port"] = sk.port.https + env.storage_controller.on_safekeeper_deploy(sk.id, body) + + # 4. Wait for hearbeat round complete. + def storcon_heartbeat(): + assert env.storage_controller.log_contains( + "Heartbeat round complete for 1 safekeepers, 0 offline" + ) + + wait_until(storcon_heartbeat) diff --git a/test_runner/regress/test_wal_acceptor.py b/test_runner/regress/test_wal_acceptor.py index 0366e88389..55e38b29a2 100644 --- a/test_runner/regress/test_wal_acceptor.py +++ b/test_runner/regress/test_wal_acceptor.py @@ -1427,6 +1427,7 @@ class SafekeeperEnv: pg=self.port_distributor.get_port(), pg_tenant_only=self.port_distributor.get_port(), http=self.port_distributor.get_port(), + https=None, ) safekeeper_dir = self.repo_dir / f"sk{i}" From b0922967e03732bf8e22a8d2273da43dbae94a01 Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 14 Mar 2025 15:51:11 +0400 Subject: [PATCH 193/207] Bump humantime version and remove advisories.ignore (#11242) ## Problem - Closes: https://github.com/neondatabase/neon/issues/11179#issuecomment-2724222041 ## Summary of changes - Bump humantime version to `2.2` - Remove `RUSTSEC-2025-0014` from `advisories.ignore` --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- deny.toml | 4 ---- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12fa3589f6..39ce785a4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2904,9 +2904,9 @@ checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" [[package]] name = "humantime" -version = "2.1.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" [[package]] name = "humantime-serde" diff --git a/Cargo.toml b/Cargo.toml index 82fb463182..f2a94d2371 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -106,7 +106,7 @@ hostname = "0.4" http = {version = "1.1.0", features = ["std"]} http-types = { version = "2", default-features = false } http-body-util = "0.1.2" -humantime = "2.1" +humantime = "2.2" humantime-serde = "1.1.1" hyper0 = { package = "hyper", version = "0.14" } hyper = "1.4" diff --git a/deny.toml b/deny.toml index 1023b1833a..ed7aa9ef9f 100644 --- a/deny.toml +++ b/deny.toml @@ -31,10 +31,6 @@ reason = "the marvin attack only affects private key decryption, not public key id = "RUSTSEC-2024-0436" reason = "The paste crate is a build-only dependency with no runtime components. It is unlikely to have any security impact." -[[advisories.ignore]] -id = "RUSTSEC-2025-0014" -reason = "The humantime is widely used and is not easy to replace right now. It is unmaintained, but it has no known vulnerabilities to care about. #11179" - # This section is considered when running `cargo deny check licenses` # More documentation for the licenses section can be found here: # https://embarkstudios.github.io/cargo-deny/checks/licenses/cfg.html From 7fe5a689b4dd501a084181ccad03e0bbc3c0f6f2 Mon Sep 17 00:00:00 2001 From: Conrad Ludgate Date: Fri, 14 Mar 2025 13:54:57 +0000 Subject: [PATCH 194/207] feat(proxy): export ingress metrics (#11244) ## Problem We exposed the direction tag in #10925 but didn't actually include the ingress tag in the export to allow for an adaption period. ## Summary of changes We now export the ingress direction --- proxy/src/proxy/passthrough.rs | 4 +- proxy/src/serverless/conn_pool_lib.rs | 9 +- proxy/src/serverless/http_conn_pool.rs | 9 +- proxy/src/serverless/sql_over_http.rs | 17 ++- proxy/src/usage_metrics.rs | 148 ++++++++++++++++++------- 5 files changed, 126 insertions(+), 61 deletions(-) diff --git a/proxy/src/proxy/passthrough.rs b/proxy/src/proxy/passthrough.rs index 23b9897155..c100b8d716 100644 --- a/proxy/src/proxy/passthrough.rs +++ b/proxy/src/proxy/passthrough.rs @@ -10,7 +10,7 @@ use crate::config::ComputeConfig; use crate::control_plane::messages::MetricsAuxInfo; use crate::metrics::{Direction, Metrics, NumClientConnectionsGuard, NumConnectionRequestsGuard}; use crate::stream::Stream; -use crate::usage_metrics::{Ids, MetricCounterRecorder, TrafficDirection, USAGE_METRICS}; +use crate::usage_metrics::{Ids, MetricCounterRecorder, USAGE_METRICS}; /// Forward bytes in both directions (client <-> compute). #[tracing::instrument(skip_all)] @@ -24,7 +24,6 @@ pub(crate) async fn proxy_pass( let usage_tx = USAGE_METRICS.register(Ids { endpoint_id: aux.endpoint_id, branch_id: aux.branch_id, - direction: TrafficDirection::Egress, private_link_id, }); @@ -47,6 +46,7 @@ pub(crate) async fn proxy_pass( |cnt| { // Number of bytes the client sent to the compute node (inbound). metrics.get_metric(m_recv).inc_by(cnt as u64); + usage_tx.record_ingress(cnt as u64); }, ); diff --git a/proxy/src/serverless/conn_pool_lib.rs b/proxy/src/serverless/conn_pool_lib.rs index 933204994b..77b548cc43 100644 --- a/proxy/src/serverless/conn_pool_lib.rs +++ b/proxy/src/serverless/conn_pool_lib.rs @@ -22,7 +22,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::protocol2::ConnectionInfoExtra; use crate::types::{DbName, EndpointCacheKey, RoleName}; -use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS}; +use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; #[derive(Debug, Clone)] pub(crate) struct ConnInfo { @@ -639,11 +639,7 @@ impl Client { (&mut inner.inner, Discard { conn_info, pool }) } - pub(crate) fn metrics( - &self, - direction: TrafficDirection, - ctx: &RequestContext, - ) -> Arc { + pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc { let aux = &self .inner .as_ref() @@ -659,7 +655,6 @@ impl Client { USAGE_METRICS.register(Ids { endpoint_id: aux.endpoint_id, branch_id: aux.branch_id, - direction, private_link_id, }) } diff --git a/proxy/src/serverless/http_conn_pool.rs b/proxy/src/serverless/http_conn_pool.rs index bca2d4c165..1c6574e57e 100644 --- a/proxy/src/serverless/http_conn_pool.rs +++ b/proxy/src/serverless/http_conn_pool.rs @@ -19,7 +19,7 @@ use crate::control_plane::messages::{ColdStartInfo, MetricsAuxInfo}; use crate::metrics::{HttpEndpointPoolsGuard, Metrics}; use crate::protocol2::ConnectionInfoExtra; use crate::types::EndpointCacheKey; -use crate::usage_metrics::{Ids, MetricCounter, TrafficDirection, USAGE_METRICS}; +use crate::usage_metrics::{Ids, MetricCounter, USAGE_METRICS}; pub(crate) type Send = http2::SendRequest; pub(crate) type Connect = http2::Connection, hyper::body::Incoming, TokioExecutor>; @@ -265,11 +265,7 @@ impl Client { Self { inner } } - pub(crate) fn metrics( - &self, - direction: TrafficDirection, - ctx: &RequestContext, - ) -> Arc { + pub(crate) fn metrics(&self, ctx: &RequestContext) -> Arc { let aux = &self.inner.aux; let private_link_id = match ctx.extra() { @@ -281,7 +277,6 @@ impl Client { USAGE_METRICS.register(Ids { endpoint_id: aux.endpoint_id, branch_id: aux.branch_id, - direction, private_link_id, }) } diff --git a/proxy/src/serverless/sql_over_http.rs b/proxy/src/serverless/sql_over_http.rs index a79a478126..10e378a18d 100644 --- a/proxy/src/serverless/sql_over_http.rs +++ b/proxy/src/serverless/sql_over_http.rs @@ -42,7 +42,7 @@ use crate::metrics::{HttpDirection, Metrics}; use crate::proxy::{NeonOptions, run_until_cancelled}; use crate::serverless::backend::HttpConnError; use crate::types::{DbName, RoleName}; -use crate::usage_metrics::{MetricCounter, MetricCounterRecorder, TrafficDirection}; +use crate::usage_metrics::{MetricCounter, MetricCounterRecorder}; #[derive(serde::Deserialize)] #[serde(rename_all = "camelCase")] @@ -663,6 +663,7 @@ async fn handle_db_inner( let parsed_headers = HttpHeaders::try_parse(headers)?; + let mut request_len = 0; let fetch_and_process_request = Box::pin( async { let body = read_body_with_limit( @@ -671,6 +672,8 @@ async fn handle_db_inner( ) .await?; + request_len = body.len(); + Metrics::get() .proxy .http_conn_content_length_bytes @@ -765,7 +768,7 @@ async fn handle_db_inner( } }; - let metrics = client.metrics(TrafficDirection::Egress, ctx); + let metrics = client.metrics(ctx); let len = json_output.len(); let response = response @@ -781,6 +784,8 @@ async fn handle_db_inner( // count the egress bytes - we miss the TLS and header overhead but oh well... // moving this later in the stack is going to be a lot of effort and ehhhh metrics.record_egress(len as u64); + metrics.record_ingress(request_len as u64); + Metrics::get() .proxy .http_conn_content_length_bytes @@ -838,7 +843,7 @@ async fn handle_auth_broker_inner( .expect("all headers and params received via hyper should be valid for request"); // todo: map body to count egress - let _metrics = client.metrics(TrafficDirection::Egress, ctx); + let _metrics = client.metrics(ctx); Ok(client .inner @@ -1168,10 +1173,10 @@ enum Discard<'a> { } impl Client { - fn metrics(&self, direction: TrafficDirection, ctx: &RequestContext) -> Arc { + fn metrics(&self, ctx: &RequestContext) -> Arc { match self { - Client::Remote(client) => client.metrics(direction, ctx), - Client::Local(local_client) => local_client.metrics(direction, ctx), + Client::Remote(client) => client.metrics(ctx), + Client::Local(local_client) => local_client.metrics(ctx), } } diff --git a/proxy/src/usage_metrics.rs b/proxy/src/usage_metrics.rs index 004d268fa1..2b27dc5c76 100644 --- a/proxy/src/usage_metrics.rs +++ b/proxy/src/usage_metrics.rs @@ -44,11 +44,17 @@ const HTTP_REPORTING_RETRY_DURATION: Duration = Duration::from_secs(60); pub(crate) struct Ids { pub(crate) endpoint_id: EndpointIdInt, pub(crate) branch_id: BranchIdInt, - pub(crate) direction: TrafficDirection, #[serde(with = "none_as_empty_string")] pub(crate) private_link_id: Option, } +#[derive(Eq, Hash, PartialEq, Serialize, Deserialize, Debug, Clone)] +struct Extra { + #[serde(flatten)] + ids: Ids, + direction: TrafficDirection, +} + mod none_as_empty_string { use serde::Deserialize; use smol_str::SmolStr; @@ -76,18 +82,23 @@ pub(crate) enum TrafficDirection { pub(crate) trait MetricCounterRecorder { /// Record that some bytes were sent from the proxy to the client fn record_egress(&self, bytes: u64); + + /// Record that some bytes were sent from the client to the proxy + fn record_ingress(&self, bytes: u64); + /// Record that some connections were opened fn record_connection(&self, count: usize); } trait MetricCounterReporter { - fn get_metrics(&mut self) -> (u64, usize); - fn move_metrics(&self) -> (u64, usize); + fn get_metrics(&mut self) -> MetricsData; + fn move_metrics(&self) -> MetricsData; } #[derive(Debug)] pub(crate) struct MetricCounter { transmitted: AtomicU64, + received: AtomicU64, opened_connections: AtomicUsize, } @@ -97,6 +108,11 @@ impl MetricCounterRecorder for MetricCounter { self.transmitted.fetch_add(bytes, Ordering::Relaxed); } + /// Record that some bytes were sent from the proxy to the client + fn record_ingress(&self, bytes: u64) { + self.received.fetch_add(bytes, Ordering::Relaxed); + } + /// Record that some connections were opened fn record_connection(&self, count: usize) { self.opened_connections.fetch_add(count, Ordering::Relaxed); @@ -104,29 +120,43 @@ impl MetricCounterRecorder for MetricCounter { } impl MetricCounterReporter for MetricCounter { - fn get_metrics(&mut self) -> (u64, usize) { - ( - *self.transmitted.get_mut(), - *self.opened_connections.get_mut(), - ) + fn get_metrics(&mut self) -> MetricsData { + MetricsData { + received: *self.received.get_mut(), + transmitted: *self.transmitted.get_mut(), + connections: *self.opened_connections.get_mut(), + } } - fn move_metrics(&self) -> (u64, usize) { - ( - self.transmitted.swap(0, Ordering::Relaxed), - self.opened_connections.swap(0, Ordering::Relaxed), - ) + + fn move_metrics(&self) -> MetricsData { + MetricsData { + received: self.received.swap(0, Ordering::Relaxed), + transmitted: self.transmitted.swap(0, Ordering::Relaxed), + connections: self.opened_connections.swap(0, Ordering::Relaxed), + } } } +struct MetricsData { + transmitted: u64, + received: u64, + connections: usize, +} + +struct BytesSent { + transmitted: u64, + received: u64, +} + trait Clearable { /// extract the value that should be reported - fn should_report(self: &Arc) -> Option; + fn should_report(self: &Arc) -> Option; /// Determine whether the counter should be cleared from the global map. fn should_clear(self: &mut Arc) -> bool; } impl Clearable for C { - fn should_report(self: &Arc) -> Option { + fn should_report(self: &Arc) -> Option { // heuristic to see if the branch is still open // if a clone happens while we are observing, the heuristic will be incorrect. // @@ -139,14 +169,21 @@ impl Clearable for C { // (to avoid sending the same metrics twice) // see the relevant discussion on why to do so even if the status is not success: // https://github.com/neondatabase/neon/pull/4563#discussion_r1246710956 - let (value, opened) = self.move_metrics(); + let MetricsData { + transmitted, + received, + connections, + } = self.move_metrics(); // Our only requirement is that we report in every interval if there was an open connection // if there were no opened connections since, then we don't need to report - if value == 0 && !is_open && opened == 0 { + if transmitted == 0 && received == 0 && !is_open && connections == 0 { None } else { - Some(value) + Some(BytesSent { + transmitted, + received, + }) } } fn should_clear(self: &mut Arc) -> bool { @@ -154,9 +191,13 @@ impl Clearable for C { let Some(counter) = Arc::get_mut(self) else { return false; }; - let (opened, value) = counter.get_metrics(); + let MetricsData { + transmitted, + received, + connections, + } = counter.get_metrics(); // clear if there's no data to report - value == 0 && opened == 0 + transmitted == 0 && received == 0 && connections == 0 } } @@ -178,6 +219,7 @@ impl Metrics { .entry(ids) .or_insert_with(|| { Arc::new(MetricCounter { + received: AtomicU64::new(0), transmitted: AtomicU64::new(0), opened_connections: AtomicUsize::new(0), }) @@ -242,10 +284,10 @@ pub async fn task_main(config: &MetricCollectionConfig) -> anyhow::Result( endpoints: &ClashMap, FastHasher>, -) -> Vec<(Ids, u64)> { +) -> Vec<(Ids, BytesSent)> { let mut metrics_to_clear = Vec::new(); - let metrics_to_send: Vec<(Ids, u64)> = endpoints + let metrics_to_send: Vec<(Ids, BytesSent)> = endpoints .iter() .filter_map(|counter| { let key = counter.key().clone(); @@ -271,26 +313,46 @@ fn collect_and_clear_metrics( } fn create_event_chunks<'a>( - metrics_to_send: &'a [(Ids, u64)], + metrics_to_send: &'a [(Ids, BytesSent)], hostname: &'a str, prev: DateTime, now: DateTime, chunk_size: usize, -) -> impl Iterator>> + 'a { +) -> impl Iterator>> + 'a { metrics_to_send .chunks(chunk_size) .map(move |chunk| EventChunk { events: chunk .iter() - .map(|(ids, value)| Event { - kind: EventType::Incremental { - start_time: prev, - stop_time: now, - }, - metric: PROXY_IO_BYTES_PER_CLIENT, - idempotency_key: idempotency_key(hostname), - value: *value, - extra: ids.clone(), + .flat_map(|(ids, bytes)| { + [ + Event { + kind: EventType::Incremental { + start_time: prev, + stop_time: now, + }, + metric: PROXY_IO_BYTES_PER_CLIENT, + idempotency_key: idempotency_key(hostname), + value: bytes.transmitted, + extra: Extra { + ids: ids.clone(), + direction: TrafficDirection::Egress, + }, + }, + Event { + kind: EventType::Incremental { + start_time: prev, + stop_time: now, + }, + metric: PROXY_IO_BYTES_PER_CLIENT, + idempotency_key: idempotency_key(hostname), + value: bytes.received, + extra: Extra { + ids: ids.clone(), + direction: TrafficDirection::Ingress, + }, + }, + ] }) .collect(), }) @@ -350,7 +412,7 @@ fn create_remote_path_prefix(now: DateTime) -> String { async fn upload_main_events_chunked( client: &http::ClientWithMiddleware, metric_collection_endpoint: &reqwest::Url, - chunk: &EventChunk<'_, Event>, + chunk: &EventChunk<'_, Event>, subchunk_size: usize, ) { // Split into smaller chunks to avoid exceeding the max request size @@ -384,7 +446,7 @@ async fn upload_main_events_chunked( async fn upload_backup_events( storage: Option<&GenericRemoteStorage>, - chunk: &EventChunk<'_, Event>, + chunk: &EventChunk<'_, Event>, path_prefix: &str, cancel: &CancellationToken, ) -> anyhow::Result<()> { @@ -461,7 +523,7 @@ mod tests { #[tokio::test] async fn metrics() { - type Report = EventChunk<'static, Event>; + type Report = EventChunk<'static, Event>; let reports: Arc>> = Arc::default(); let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); @@ -533,7 +595,6 @@ mod tests { let counter = metrics.register(Ids { endpoint_id: (&EndpointId::from("e1")).into(), branch_id: (&BranchId::from("b1")).into(), - direction: TrafficDirection::Egress, private_link_id: None, }); @@ -551,13 +612,19 @@ mod tests { .await; let r = std::mem::take(&mut *reports.lock().unwrap()); assert_eq!(r.len(), 1); - assert_eq!(r[0].events.len(), 1); + assert_eq!(r[0].events.len(), 2); assert_eq!(r[0].events[0].value, 0); + assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress); + assert_eq!(r[0].events[1].value, 0); + assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress); pushed_chunks.extend(r); // record egress counter.record_egress(1); + // record ingress + counter.record_ingress(2); + // egress should be observered collect_metrics_iteration( &metrics.endpoints, @@ -572,8 +639,11 @@ mod tests { .await; let r = std::mem::take(&mut *reports.lock().unwrap()); assert_eq!(r.len(), 1); - assert_eq!(r[0].events.len(), 1); + assert_eq!(r[0].events.len(), 2); assert_eq!(r[0].events[0].value, 1); + assert_eq!(r[0].events[0].extra.direction, TrafficDirection::Egress); + assert_eq!(r[0].events[1].value, 2); + assert_eq!(r[0].events[1].extra.direction, TrafficDirection::Ingress); pushed_chunks.extend(r); // release counter From b7c6738524208a892839a90c8afef953104f5d28 Mon Sep 17 00:00:00 2001 From: Anastasia Lubennikova Date: Fri, 14 Mar 2025 14:08:16 +0000 Subject: [PATCH 195/207] feat(compute_ctl): add pgaudt log gc to compute_ctl (#11169) - add pgaudt_gc thread to compute_ctl to cleanup old pgaudit logs if they exist. pgaudit can rotate files, but it doesn't delete the old files - Add AUDIT_LOG_DIR_SIZE metric to compute_ctl to track the size of the audit log directory in bytes. - Fix permissions for rsyslog state files directory --- compute/vm-image-spec-bookworm.yaml | 2 +- compute/vm-image-spec-bullseye.yaml | 2 +- compute_tools/src/compute.rs | 14 ++-- compute_tools/src/config.rs | 3 +- .../compute_audit_rsyslog_template.conf | 3 +- compute_tools/src/metrics.rs | 16 ++++- compute_tools/src/rsyslog.rs | 65 ++++++++++++++++++- 7 files changed, 89 insertions(+), 16 deletions(-) diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml index e6707381ac..0cf72b6f74 100644 --- a/compute/vm-image-spec-bookworm.yaml +++ b/compute/vm-image-spec-bookworm.yaml @@ -145,7 +145,7 @@ merge: | COPY compute_rsyslog.conf /etc/compute_rsyslog.conf RUN chmod 0666 /etc/compute_rsyslog.conf - RUN chmod 0666 /var/log/ + RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml index c89ee112dc..9deaf3ea55 100644 --- a/compute/vm-image-spec-bullseye.yaml +++ b/compute/vm-image-spec-bullseye.yaml @@ -140,7 +140,7 @@ merge: | COPY compute_rsyslog.conf /etc/compute_rsyslog.conf RUN chmod 0666 /etc/compute_rsyslog.conf - RUN chmod 0666 /var/log/ + RUN mkdir /var/log/rsyslog && chown -R postgres /var/log/rsyslog COPY --from=libcgroup-builder /libcgroup-install/bin/* /usr/bin/ diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index a0654ea0e4..58b99dde53 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -37,7 +37,7 @@ use crate::logger::startup_context_from_env; use crate::lsn_lease::launch_lsn_lease_bg_task_for_static; use crate::monitor::launch_monitor; use crate::pg_helpers::*; -use crate::rsyslog::configure_audit_rsyslog; +use crate::rsyslog::{configure_audit_rsyslog, launch_pgaudit_gc}; use crate::spec::*; use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; @@ -625,13 +625,11 @@ impl ComputeNode { } let log_directory_path = Path::new(&self.params.pgdata).join("log"); - // TODO: make this more robust - // now rsyslog starts once and there is no monitoring or restart if it fails - configure_audit_rsyslog( - log_directory_path.to_str().unwrap(), - "hipaa", - &remote_endpoint, - )?; + let log_directory_path = log_directory_path.to_string_lossy().to_string(); + configure_audit_rsyslog(log_directory_path.clone(), "hipaa", &remote_endpoint)?; + + // Launch a background task to clean up the audit logs + launch_pgaudit_gc(log_directory_path); } // Launch remaining service threads diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index 7aa7360f9d..e4acc5471c 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -167,7 +167,8 @@ pub fn write_postgres_conf( writeln!(file, "# Managed by compute_ctl audit settings: begin")?; // This log level is very verbose // but this is necessary for HIPAA compliance. - writeln!(file, "pgaudit.log='all'")?; + // Exclude 'misc' category, because it doesn't contain anythig relevant. + writeln!(file, "pgaudit.log='all, -misc'")?; writeln!(file, "pgaudit.log_parameter=on")?; // Disable logging of catalog queries // The catalog doesn't contain sensitive data, so we don't need to audit it. diff --git a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf index bef3c36446..1937cdc292 100644 --- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf +++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf @@ -4,7 +4,8 @@ module(load="imfile") # Input configuration for log files in the specified directory # Replace {log_directory} with the directory containing the log files input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Facility="local0") -global(workDirectory="/var/log") +# the directory to store rsyslog state files +global(workDirectory="/var/log/rsyslog") # Forward logs to remote syslog server *.* @@{remote_endpoint} \ No newline at end of file diff --git a/compute_tools/src/metrics.rs b/compute_tools/src/metrics.rs index dab32d5dc1..4caa48307e 100644 --- a/compute_tools/src/metrics.rs +++ b/compute_tools/src/metrics.rs @@ -1,6 +1,8 @@ -use metrics::core::Collector; +use metrics::core::{AtomicF64, Collector, GenericGauge}; use metrics::proto::MetricFamily; -use metrics::{IntCounterVec, UIntGaugeVec, register_int_counter_vec, register_uint_gauge_vec}; +use metrics::{ + IntCounterVec, UIntGaugeVec, register_gauge, register_int_counter_vec, register_uint_gauge_vec, +}; use once_cell::sync::Lazy; pub(crate) static INSTALLED_EXTENSIONS: Lazy = Lazy::new(|| { @@ -59,10 +61,20 @@ pub(crate) static REMOTE_EXT_REQUESTS_TOTAL: Lazy = Lazy::new(|| .expect("failed to define a metric") }); +// Size of audit log directory in bytes +pub(crate) static AUDIT_LOG_DIR_SIZE: Lazy> = Lazy::new(|| { + register_gauge!( + "compute_audit_log_dir_size", + "Size of audit log directory in bytes", + ) + .expect("failed to define a metric") +}); + pub fn collect() -> Vec { let mut metrics = INSTALLED_EXTENSIONS.collect(); metrics.extend(CPLANE_REQUESTS_TOTAL.collect()); metrics.extend(REMOTE_EXT_REQUESTS_TOTAL.collect()); metrics.extend(DB_MIGRATION_FAILED.collect()); + metrics.extend(AUDIT_LOG_DIR_SIZE.collect()); metrics } diff --git a/compute_tools/src/rsyslog.rs b/compute_tools/src/rsyslog.rs index c8fba4fdcd..7537fafaa5 100644 --- a/compute_tools/src/rsyslog.rs +++ b/compute_tools/src/rsyslog.rs @@ -1,8 +1,11 @@ +use std::fs; +use std::path::Path; use std::process::Command; +use std::time::Duration; use std::{fs::OpenOptions, io::Write}; use anyhow::{Context, Result}; -use tracing::info; +use tracing::{error, info, instrument, warn}; fn get_rsyslog_pid() -> Option { let output = Command::new("pgrep") @@ -43,7 +46,7 @@ fn restart_rsyslog() -> Result<()> { } pub fn configure_audit_rsyslog( - log_directory: &str, + log_directory: String, tag: &str, remote_endpoint: &str, ) -> Result<()> { @@ -75,3 +78,61 @@ pub fn configure_audit_rsyslog( Ok(()) } + +#[instrument(skip_all)] +async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> { + info!("running pgaudit GC main loop"); + loop { + // Check log_directory for old pgaudit logs and delete them. + // New log files are checked every 5 minutes, as set in pgaudit.log_rotation_age + // Find files that were not modified in the last 15 minutes and delete them. + // This should be enough time for rsyslog to process the logs and for us to catch the alerts. + // + // In case of a very high load, we might need to adjust this value and pgaudit.log_rotation_age. + // + // TODO: add some smarter logic to delete the files that are fully streamed according to rsyslog + // imfile-state files, but for now just do a simple GC to avoid filling up the disk. + let _ = Command::new("find") + .arg(&log_directory) + .arg("-name") + .arg("audit*.log") + .arg("-mmin") + .arg("+15") + .arg("-delete") + .output()?; + + // also collect the metric for the size of the log directory + async fn get_log_files_size(path: &Path) -> Result { + let mut total_size = 0; + + for entry in fs::read_dir(path)? { + let entry = entry?; + let entry_path = entry.path(); + + if entry_path.is_file() && entry_path.to_string_lossy().ends_with("log") { + total_size += entry.metadata()?.len(); + } + } + + Ok(total_size) + } + + let log_directory_size = get_log_files_size(Path::new(&log_directory)) + .await + .unwrap_or_else(|e| { + warn!("Failed to get log directory size: {}", e); + 0 + }); + crate::metrics::AUDIT_LOG_DIR_SIZE.set(log_directory_size as f64); + tokio::time::sleep(Duration::from_secs(60)).await; + } +} + +// launch pgaudit GC thread to clean up the old pgaudit logs stored in the log_directory +pub fn launch_pgaudit_gc(log_directory: String) { + tokio::spawn(async move { + if let Err(e) = pgaudit_gc_main_loop(log_directory).await { + error!("pgaudit GC main loop failed: {}", e); + } + }); +} From 4a97cd0b7e95d88ef23ea235700326db39d6e0cb Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Fri, 14 Mar 2025 17:02:55 +0000 Subject: [PATCH 196/207] test_runner: fix tests with jsonnet for Python 3.13 (#11240) ## Problem Python's `jsonnet` 0.20.0 doesn't support Python 3.13, so we have a couple of tests xfailed because of that. ## Summary of changes - Bump `jsonnet` to `0.21.0rc2` which supports Python 3.13 - Unxfail `test_sql_exporter_metrics_e2e` and `test_sql_exporter_metrics_smoke` on Python 3.13 --- poetry.lock | 34 ++++++++++++++++++--- pyproject.toml | 4 +-- test_runner/regress/test_compute_metrics.py | 9 ++---- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/poetry.lock b/poetry.lock index 03aa543b06..7c84b2969b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1491,14 +1491,38 @@ files = [ [[package]] name = "jsonnet" -version = "0.20.0" -description = "Python bindings for Jsonnet - The data templating language" +version = "0.21.0rc2" +description = "Python bindings for Jsonnet - The data templating language " optional = false python-versions = "*" groups = ["main"] -markers = "python_version < \"3.13\"" files = [ - {file = "jsonnet-0.20.0.tar.gz", hash = "sha256:7e770c7bf3a366b97b650a39430450f77612e74406731eb75c5bd59f3f104d4f"}, + {file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8779ac6820fee44ef736df2baedc3ae93e8cd5d672ee105015c2a47fe627a727"}, + {file = "jsonnet-0.21.0rc2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:99affe8c71e2551465064a8039bb3d1cba27a0b73b2b9ff1b652e06f17d4ea8b"}, + {file = "jsonnet-0.21.0rc2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a9dffb9aa01013d100ddfb7230d1eeb80f2a8eef712b1825a60cad57106d8bd"}, + {file = "jsonnet-0.21.0rc2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cca6c95f2879dcab52650b7aa09a4e82a139b084931b1f6f8c840f834fecc08a"}, + {file = "jsonnet-0.21.0rc2-cp310-cp310-win_amd64.whl", hash = "sha256:016d6afdb302a6d00bf3bce6a0c3d9c093b992e33f9bc67c64a868035892258e"}, + {file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e893ab2c9bf10d8ec9e9b0cee8961879c88d0619cc6d8f75ea284a78e06ae32b"}, + {file = "jsonnet-0.21.0rc2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06b353cd3daa2781e6cd308e05f2f116396376994bcb5f59aaadbc6a752c7f2"}, + {file = "jsonnet-0.21.0rc2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9eb2bc8e62b73101329072da322f7e2a1bdb3ac530b94669128d1b480e311e55"}, + {file = "jsonnet-0.21.0rc2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:113766fd0c25620807bcf04d4c739f461c971a4f0e4aece9ba62b4e762de9598"}, + {file = "jsonnet-0.21.0rc2-cp311-cp311-win_amd64.whl", hash = "sha256:8dab208c2c2760be60f87d1ceb8b28c86b51ed0e31129a7d90cd5fe890b41225"}, + {file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:95f5b9dd26a41d6f258d1baa8d22e557051beeed8c52a6202584f1becca9dcb5"}, + {file = "jsonnet-0.21.0rc2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cecc6d76e2b377260fae0a060097c113e6ac361b8f739903ea7f3f5f64cdebdf"}, + {file = "jsonnet-0.21.0rc2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aaa2d18224af7e63872ef4a101e93962505456cf5f5439c3cfc25dad6845f8b1"}, + {file = "jsonnet-0.21.0rc2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:2a9063f811554487ed552445e964aeec969cafb266b965029c8d6b091ce47950"}, + {file = "jsonnet-0.21.0rc2-cp312-cp312-win_amd64.whl", hash = "sha256:80d171182c169761f744ba50068a4ad35d48e52b91d25bf4c7bb9a72f0a04f71"}, + {file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f3657938f87cb6bc6da20ca631d437b5faf469ca060a7c7def9c8fd2f25a5e06"}, + {file = "jsonnet-0.21.0rc2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3dcebc30cb991b58bc416ee05e9387004d04716d5c0b89714ff042bd069af5c8"}, + {file = "jsonnet-0.21.0rc2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ac52c95482df3ed93c908468ca2f40d4825b6baba284b395ddc47bd663b8c3a"}, + {file = "jsonnet-0.21.0rc2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8b34450823a7a1861de892fef9f29de1b4c19e1a79e27d81ffe7e57646cc89d6"}, + {file = "jsonnet-0.21.0rc2-cp313-cp313-win_amd64.whl", hash = "sha256:573fd2580e46f4875ec505f1732f9e804b7063cba790342ed6fdafe9a6b30556"}, + {file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:871ca1411de3626499bda60b330d37f85a592918f99ba4809089bbb8d4f5bfe4"}, + {file = "jsonnet-0.21.0rc2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5d33b25a9c5bf9099100b9b16cb385a2876d891fbe639ee9d476fc75c861903a"}, + {file = "jsonnet-0.21.0rc2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2bac374565c7f89a4675f19fd2b624ed1376519267f4e444f49b6fc0368f6e5"}, + {file = "jsonnet-0.21.0rc2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:fab7bbd88f9159f88a7350701a97bda24de9e3b9eef14c2501ba8b9224160d60"}, + {file = "jsonnet-0.21.0rc2-cp39-cp39-win_amd64.whl", hash = "sha256:ed71ffba0fd233a1bca7b0f7be79730792c5383e562a9dc7da152478d9ee1612"}, + {file = "jsonnet-0.21.0rc2.tar.gz", hash = "sha256:2b83ec4b5a771c3732e0972be23a71f042ad2940db6918d3a52aade69bc394fb"}, ] [[package]] @@ -3820,4 +3844,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.1" python-versions = "^3.11" -content-hash = "010ffce959bb256880ab5a267048c182e4612b3151f9a94e3bf5d3a7807962fe" +content-hash = "715fc8c896dcfa1b15054deeddcdec557ef93af91b26e1c8e4688fe4dbef5296" diff --git a/pyproject.toml b/pyproject.toml index e7f5c62bd0..e009b0773e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,8 +48,8 @@ types-jwcrypto = "^1.5.0.20240925" pyyaml = "^6.0.2" types-pyyaml = "^6.0.12.20240917" testcontainers = "^4.9.0" -# Jsonnet doesn't support Python 3.13 yet -jsonnet = { version = "^0.20.0", markers = "python_version < '3.13'" } +# Install a release candidate of `jsonnet`, as it supports Python 3.13 +jsonnet = "^0.21.0-rc2" [tool.poetry.group.dev.dependencies] mypy = "==1.13.0" diff --git a/test_runner/regress/test_compute_metrics.py b/test_runner/regress/test_compute_metrics.py index b360162dc1..85cd065a2f 100644 --- a/test_runner/regress/test_compute_metrics.py +++ b/test_runner/regress/test_compute_metrics.py @@ -3,12 +3,13 @@ from __future__ import annotations import enum import os import shutil -import sys from enum import StrEnum from logging import debug from pathlib import Path from typing import TYPE_CHECKING, cast +# Docs are available at https://jsonnet.org/ref/bindings.html#python_api +import _jsonnet import pytest import requests import yaml @@ -92,10 +93,6 @@ def jsonnet_evaluate_file( ext_vars: str | dict[str, str] | None = None, tla_vars: str | dict[str, str] | None = None, ) -> str: - # Jsonnet doesn't support Python 3.13 yet - # Docs are available at https://jsonnet.org/ref/bindings.html#python_api - import _jsonnet - return cast( "str", _jsonnet.evaluate_file( @@ -130,7 +127,6 @@ class SqlExporterProcess(StrEnum): AUTOSCALING = "autoscaling" -@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet") @pytest.mark.parametrize( "collector_name", ["neon_collector", "neon_collector_autoscaling"], @@ -359,7 +355,6 @@ else: self.__proc.wait() -@pytest.mark.xfail(sys.version_info >= (3, 13), reason="Jsonnet doesn't support Python 3.13 yet") @pytest.mark.parametrize( "exporter", [SqlExporterProcess.COMPUTE, SqlExporterProcess.AUTOSCALING], From 3168bd0e3ac916e62d2032d2e49cbbd1f49e474a Mon Sep 17 00:00:00 2001 From: Dmitrii Kovalkov <34828390+DimasKovas@users.noreply.github.com> Date: Fri, 14 Mar 2025 21:42:09 +0400 Subject: [PATCH 197/207] tests: suppress "Cancelled request finished with an error" in test_timeline_archive (#11241) ## Problem Previous PR https://github.com/neondatabase/neon/pull/11190 didn't suppress `Cancelled request finished with an error` messages, which are also expected, so the test https://github.com/neondatabase/neon/issues/11177 is still flaky. ## Summary of changes - Suppress `Cancelled request finished with an error` in `test_timeline_archive` --- test_runner/regress/test_timeline_archive.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test_runner/regress/test_timeline_archive.py b/test_runner/regress/test_timeline_archive.py index 11567cafd0..2bad0bb671 100644 --- a/test_runner/regress/test_timeline_archive.py +++ b/test_runner/regress/test_timeline_archive.py @@ -46,8 +46,11 @@ def test_timeline_archive(neon_env_builder: NeonEnvBuilder, shard_count: int): # We make /archival_config requests that are intended to fail. # It's expected that storcon drops requests to other pageservers after # it gets the first error (https://github.com/neondatabase/neon/issues/11177) - ps.allowed_errors.append( - ".*WARN.* path=/v1/tenant/.*/archival_config .*request was dropped before completing", + ps.allowed_errors.extend( + [ + ".*WARN.* path=/v1/tenant/.*/archival_config .*request was dropped before completing", + ".*ERROR.* path=/v1/tenant/.*/archival_config .*Cancelled request finished with an error.*", + ] ) # first try to archive a non existing timeline for an existing tenant: From 53d50c7ea574c3e53827fa83614eeb7479d9ee5c Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 14 Mar 2025 18:45:18 +0100 Subject: [PATCH 198/207] pageserver: deflake compaction tests (#11246) These need to set `NoYield`, otherwise they may be preempted by pending L0 compaction. --- pageserver/src/tenant.rs | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 7a06d60268..55b5704d67 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -6559,7 +6559,11 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact(&CancellationToken::new(), EnumSet::empty(), &ctx) + .compact( + &CancellationToken::new(), + CompactFlags::NoYield.into(), + &ctx, + ) .await?; let mut writer = tline.writer().await; @@ -6576,7 +6580,11 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact(&CancellationToken::new(), EnumSet::empty(), &ctx) + .compact( + &CancellationToken::new(), + CompactFlags::NoYield.into(), + &ctx, + ) .await?; let mut writer = tline.writer().await; @@ -6593,7 +6601,11 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact(&CancellationToken::new(), EnumSet::empty(), &ctx) + .compact( + &CancellationToken::new(), + CompactFlags::NoYield.into(), + &ctx, + ) .await?; let mut writer = tline.writer().await; @@ -6610,7 +6622,11 @@ mod tests { tline.freeze_and_flush().await?; tline - .compact(&CancellationToken::new(), EnumSet::empty(), &ctx) + .compact( + &CancellationToken::new(), + CompactFlags::NoYield.into(), + &ctx, + ) .await?; assert_eq!( @@ -6693,7 +6709,9 @@ mod tests { timeline.freeze_and_flush().await?; if compact { // this requires timeline to be &Arc - timeline.compact(&cancel, EnumSet::empty(), ctx).await?; + timeline + .compact(&cancel, CompactFlags::NoYield.into(), ctx) + .await?; } // this doesn't really need to use the timeline_id target, but it is closer to what it @@ -7399,7 +7417,9 @@ mod tests { // Perform a cycle of flush, compact, and GC tline.freeze_and_flush().await?; - tline.compact(&cancel, EnumSet::empty(), &ctx).await?; + tline + .compact(&cancel, CompactFlags::NoYield.into(), &ctx) + .await?; tenant .gc_iteration(Some(tline.timeline_id), 0, Duration::ZERO, &cancel, &ctx) .await?; @@ -7779,7 +7799,9 @@ mod tests { let before_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len(); - tline.compact(&cancel, EnumSet::empty(), &ctx).await?; + tline + .compact(&cancel, CompactFlags::NoYield.into(), &ctx) + .await?; let after_num_l0_delta_files = tline.layers.read().await.layer_map()?.level0_deltas().len(); From a674ed8cafe4d378d43938285da866cfa6255082 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 14 Mar 2025 20:08:24 +0000 Subject: [PATCH 199/207] storcon: safety check when completing shard split (#11256) ## Problem There is a rare race between controller graceful deployment and shard splitting where we may incorrectly both abort _and_ complete the split (on different pods), and thereby leave no shards at all in the database. Related: #11254 ## Summary of changes - In complete_shard_split, refuse to delete anything if child shards are not found --- storage_controller/src/persistence.rs | 16 ++++++++++++++++ storage_controller/src/service.rs | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index 4a97aac125..85d9c574a1 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -967,10 +967,26 @@ impl Persistence { &self, split_tenant_id: TenantId, old_shard_count: ShardCount, + new_shard_count: ShardCount, ) -> DatabaseResult<()> { use crate::schema::tenant_shards::dsl::*; self.with_measured_conn(DatabaseOperation::CompleteShardSplit, move |conn| { Box::pin(async move { + // Sanity: child shards must still exist, as we're deleting parent shards + let child_shards_query = tenant_shards + .filter(tenant_id.eq(split_tenant_id.to_string())) + .filter(shard_count.eq(new_shard_count.literal() as i32)); + let child_shards = child_shards_query + .load::(conn) + .await?; + if child_shards.len() != new_shard_count.count() as usize { + return Err(DatabaseError::Logical(format!( + "Unexpected child shard count {} while completing split to \ + count {new_shard_count:?} on tenant {split_tenant_id}", + child_shards.len() + ))); + } + // Drop parent shards diesel::delete(tenant_shards) .filter(tenant_id.eq(split_tenant_id.to_string())) diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index f33408a89b..4e00136e1b 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -5753,7 +5753,7 @@ impl Service { // it doesn't match, but that requires more retry logic on this side) self.persistence - .complete_shard_split(tenant_id, old_shard_count) + .complete_shard_split(tenant_id, old_shard_count, new_shard_count) .await?; fail::fail_point!("shard-split-post-complete", |_| Err( From a5b00b87ba5bc41b9dd16776a72d3bd614c95a35 Mon Sep 17 00:00:00 2001 From: Cihan Demirci <128653800+fcdm@users.noreply.github.com> Date: Sun, 16 Mar 2025 14:53:27 +0100 Subject: [PATCH 200/207] CI(pre-merge-checks): use step-security/changed-files (#11265) Use Step Security maintained version of `tj-actions/changed-files`. https://www.stepsecurity.io/blog/harden-runner-detection-tj-actions-changed-files-action-is-compromised#use-the-stepsecurity-maintained-changed-files-action --- .github/workflows/pre-merge-checks.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml index 1e81550314..9e5c3df52c 100644 --- a/.github/workflows/pre-merge-checks.yml +++ b/.github/workflows/pre-merge-checks.yml @@ -27,7 +27,7 @@ jobs: steps: - uses: actions/checkout@v4 - - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4 + - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 id: python-src with: files: | @@ -38,7 +38,7 @@ jobs: poetry.lock pyproject.toml - - uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf # v45.0.4 + - uses: step-security/changed-files@3dbe17c78367e7d60f00d78ae6781a35be47b4a1 # v45.0.1 id: rust-src with: files: | From 228bb753546149135ddb7f4c198d8c8d28fa1c6e Mon Sep 17 00:00:00 2001 From: Peter Bendel Date: Sun, 16 Mar 2025 15:04:48 +0100 Subject: [PATCH 201/207] Extend large tenant OLTP workload ... (#11166) ... to better match the workload characteristics of real Neon customers ## Problem We analyzed workloads of large Neon users and want to extend the oltp workload to include characteristics seen in those workloads. ## Summary of changes - for re-use branch delete inserted rows from last run - adjust expected run-time (time-outs) in GitHub workflow - add queries that exposes the prefetch getpages path - add I/U/D transactions for another table (so far the workload was insert/append-only) - add an explicit vacuum analyze step and measure its time - add reindex concurrently step and measure its time (and take care that this step succeeds even if prior reindex runs have failed or were canceled) - create a second connection string for the pooled connection that removes the `-pooler` suffix from the hostname because we want to run long-running statements (database maintenance) and bypass the pooler which doesn't support unlimited statement timeout ## Test run https://github.com/neondatabase/neon/actions/runs/13851772887/job/38760172415 --- .github/workflows/large_oltp_benchmark.yml | 89 +++++++--- .../IUD_one_transaction.sql | 162 ++++++++++++++++++ .../select_prefetch_webhook.sql | 25 +++ .../test_perf_oltp_large_tenant.py | 78 ++++++++- 4 files changed, 327 insertions(+), 27 deletions(-) create mode 100644 test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql create mode 100644 test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql diff --git a/.github/workflows/large_oltp_benchmark.yml b/.github/workflows/large_oltp_benchmark.yml index f33e11cd08..a7c3118e34 100644 --- a/.github/workflows/large_oltp_benchmark.yml +++ b/.github/workflows/large_oltp_benchmark.yml @@ -2,8 +2,8 @@ name: large oltp benchmark on: # uncomment to run on push for debugging your PR - push: - branches: [ bodobolero/synthetic_oltp_workload ] + #push: + # branches: [ bodobolero/synthetic_oltp_workload ] schedule: # * is a special character in YAML so you have to quote this string @@ -12,7 +12,7 @@ on: # │ │ ┌───────────── day of the month (1 - 31) # │ │ │ ┌───────────── month (1 - 12 or JAN-DEC) # │ │ │ │ ┌───────────── day of the week (0 - 6 or SUN-SAT) - - cron: '0 15 * * *' # run once a day, timezone is utc, avoid conflict with other benchmarks + - cron: '0 15 * * 0,2,4' # run on Sunday, Tuesday, Thursday at 3 PM UTC workflow_dispatch: # adds ability to run this manually defaults: @@ -22,7 +22,7 @@ defaults: concurrency: # Allow only one workflow globally because we need dedicated resources which only exist once group: large-oltp-bench-workflow - cancel-in-progress: true + cancel-in-progress: false jobs: oltp: @@ -31,9 +31,9 @@ jobs: matrix: include: - target: new_branch - custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 - target: reuse_branch - custom_scripts: insert_webhooks.sql@2 select_any_webhook_with_skew.sql@4 select_recent_webhook.sql@4 + custom_scripts: insert_webhooks.sql@200 select_any_webhook_with_skew.sql@300 select_recent_webhook.sql@397 select_prefetch_webhook.sql@3 IUD_one_transaction.sql@100 max-parallel: 1 # we want to run each stripe size sequentially to be able to compare the results permissions: contents: write @@ -46,7 +46,6 @@ jobs: PG_VERSION: 16 # pre-determined by pre-determined project TEST_OUTPUT: /tmp/test_output BUILD_TYPE: remote - SAVE_PERF_REPORT: ${{ github.ref_name == 'main' }} PLATFORM: ${{ matrix.target }} runs-on: [ self-hosted, us-east-2, x64 ] @@ -57,8 +56,10 @@ jobs: password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} options: --init - # Increase timeout to 8h, default timeout is 6h - timeout-minutes: 480 + # Increase timeout to 2 days, default timeout is 6h - database maintenance can take a long time + # (normally 1h pgbench, 3h vacuum analyze 3.5h re-index) x 2 = 15h, leave some buffer for regressions + # in one run vacuum didn't finish within 12 hours + timeout-minutes: 2880 steps: - uses: actions/checkout@v4 @@ -89,29 +90,45 @@ jobs: - name: Set up Connection String id: set-up-connstr run: | - case "${{ matrix.target }}" in - new_branch) - CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} - ;; - reuse_branch) - CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} - ;; - *) - echo >&2 "Unknown target=${{ matrix.target }}" - exit 1 - ;; - esac + case "${{ matrix.target }}" in + new_branch) + CONNSTR=${{ steps.create-neon-branch-oltp-target.outputs.dsn }} + ;; + reuse_branch) + CONNSTR=${{ secrets.BENCHMARK_LARGE_OLTP_REUSE_CONNSTR }} + ;; + *) + echo >&2 "Unknown target=${{ matrix.target }}" + exit 1 + ;; + esac - echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + CONNSTR_WITHOUT_POOLER="${CONNSTR//-pooler/}" - - name: Benchmark pgbench with custom-scripts + echo "connstr=${CONNSTR}" >> $GITHUB_OUTPUT + echo "connstr_without_pooler=${CONNSTR_WITHOUT_POOLER}" >> $GITHUB_OUTPUT + + - name: Delete rows from prior runs in reuse branch + if: ${{ matrix.target == 'reuse_branch' }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} + PG_CONFIG: /tmp/neon/pg_install/v16/bin/pg_config + PSQL: /tmp/neon/pg_install/v16/bin/psql + PG_16_LIB_PATH: /tmp/neon/pg_install/v16/lib + run: | + echo "$(date '+%Y-%m-%d %H:%M:%S') - Deleting rows in table webhook.incoming_webhooks from prior runs" + export LD_LIBRARY_PATH=${PG_16_LIB_PATH} + ${PSQL} "${BENCHMARK_CONNSTR}" -c "SET statement_timeout = 0; DELETE FROM webhook.incoming_webhooks WHERE created_at > '2025-02-27 23:59:59+00';" + echo "$(date '+%Y-%m-%d %H:%M:%S') - Finished deleting rows in table webhook.incoming_webhooks from prior runs" + + - name: Benchmark pgbench with custom-scripts uses: ./.github/actions/run-python-test-set with: build_type: ${{ env.BUILD_TYPE }} test_selection: performance run_in_parallel: false - save_perf_report: ${{ env.SAVE_PERF_REPORT }} - extra_params: -m remote_cluster --timeout 21600 -k test_perf_oltp_large_tenant + save_perf_report: true + extra_params: -m remote_cluster --timeout 7200 -k test_perf_oltp_large_tenant_pgbench pg_version: ${{ env.PG_VERSION }} aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} env: @@ -119,6 +136,21 @@ jobs: VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + - name: Benchmark database maintenance + uses: ./.github/actions/run-python-test-set + with: + build_type: ${{ env.BUILD_TYPE }} + test_selection: performance + run_in_parallel: false + save_perf_report: true + extra_params: -m remote_cluster --timeout 172800 -k test_perf_oltp_large_tenant_maintenance + pg_version: ${{ env.PG_VERSION }} + aws-oicd-role-arn: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + env: + BENCHMARK_CONNSTR: ${{ steps.set-up-connstr.outputs.connstr_without_pooler }} + VIP_VAP_ACCESS_TOKEN: "${{ secrets.VIP_VAP_ACCESS_TOKEN }}" + PERF_TEST_RESULT_CONNSTR: "${{ secrets.PERF_TEST_RESULT_CONNSTR }}" + - name: Delete Neon Branch for large tenant if: ${{ always() && matrix.target == 'new_branch' }} uses: ./.github/actions/neon-branch-delete @@ -127,6 +159,13 @@ jobs: branch_id: ${{ steps.create-neon-branch-oltp-target.outputs.branch_id }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} + - name: Configure AWS credentials # again because prior steps could have exceeded 5 hours + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-region: eu-central-1 + role-to-assume: ${{ vars.DEV_AWS_OIDC_ROLE_ARN }} + role-duration-seconds: 18000 # 5 hours + - name: Create Allure report id: create-allure-report if: ${{ !cancelled() }} diff --git a/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql new file mode 100644 index 0000000000..4c5b3fbd11 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/IUD_one_transaction.sql @@ -0,0 +1,162 @@ +\set min_id 1 +\set max_id 1500000000 +\set range_size 100 + +-- Use uniform random instead of random_zipfian +\set random_id random(:min_id, :max_id) +\set random_mar_id random(1, 65536) +\set random_delete_id random(:min_id, :max_id) + +-- Update exactly one row (if it exists) using the uniformly chosen random_id +UPDATE transaction.transaction + SET state = 'COMPLETED', + settlement_date = CURRENT_DATE, + mar_identifier = (:random_mar_id)::int + WHERE id = (:random_id)::bigint; + +-- Insert exactly one row +INSERT INTO transaction.transaction ( + user_id, + card_id, + business_id, + preceding_transaction_id, + is_last, + is_mocked, + type, + state, + network, + subnetwork, + user_transaction_time, + settlement_date, + request_amount, + amount, + currency_code, + approval_code, + response, + gpa, + gpa_order_unload, + gpa_order, + program_transfer, + fee_transfer, + peer_transfer, + msa_orders, + risk_assessment, + auto_reload, + direct_deposit, + polarity, + real_time_fee_group, + fee, + chargeback, + standin_approved_by, + acquirer_fee_amount, + funded_account_holder, + digital_wallet_token, + network_fees, + card_security_code_verification, + fraud, + cardholder_authentication_data, + currency_conversion, + merchant, + store, + card_acceptor, + acquirer, + pos, + avs, + mar_token, + mar_preceding_related_transaction_token, + mar_business_token, + mar_acting_user_token, + mar_card_token, + mar_duration, + mar_created_time, + issuer_interchange_amount, + offer_orders, + transaction_canonical_id, + mar_identifier, + created_at, + card_acceptor_mid, + card_acceptor_name, + address_verification, + issuing_product, + mar_enhanced_data_token, + standin_reason +) +SELECT + (:random_id % 100000) + 1 AS user_id, + (:random_id % 500000) + 1 AS card_id, + (:random_id % 20000) + 1 AS business_id, + NULL AS preceding_transaction_id, + (:random_id % 2) = 0 AS is_last, + (:random_id % 5) = 0 AS is_mocked, + 'authorization' AS type, + 'PENDING' AS state, + 'VISA' AS network, + 'VISANET' AS subnetwork, + now() - ((:random_id % 100) || ' days')::interval AS user_transaction_time, + now() - ((:random_id % 100) || ' days')::interval AS settlement_date, + random() * 1000 AS request_amount, + random() * 1000 AS amount, + 'USD' AS currency_code, + md5((:random_id)::text) AS approval_code, + '{}'::jsonb AS response, + '{}'::jsonb AS gpa, + '{}'::jsonb AS gpa_order_unload, + '{}'::jsonb AS gpa_order, + '{}'::jsonb AS program_transfer, + '{}'::jsonb AS fee_transfer, + '{}'::jsonb AS peer_transfer, + '{}'::jsonb AS msa_orders, + '{}'::jsonb AS risk_assessment, + '{}'::jsonb AS auto_reload, + '{}'::jsonb AS direct_deposit, + '{}'::jsonb AS polarity, + '{}'::jsonb AS real_time_fee_group, + '{}'::jsonb AS fee, + '{}'::jsonb AS chargeback, + NULL AS standin_approved_by, + random() * 100 AS acquirer_fee_amount, + '{}'::jsonb AS funded_account_holder, + '{}'::jsonb AS digital_wallet_token, + '{}'::jsonb AS network_fees, + '{}'::jsonb AS card_security_code_verification, + '{}'::jsonb AS fraud, + '{}'::jsonb AS cardholder_authentication_data, + '{}'::jsonb AS currency_conversion, + '{}'::jsonb AS merchant, + '{}'::jsonb AS store, + '{}'::jsonb AS card_acceptor, + '{}'::jsonb AS acquirer, + '{}'::jsonb AS pos, + '{}'::jsonb AS avs, + md5((:random_id)::text || 'token') AS mar_token, + NULL AS mar_preceding_related_transaction_token, + NULL AS mar_business_token, + NULL AS mar_acting_user_token, + NULL AS mar_card_token, + random() * 1000 AS mar_duration, + now() AS mar_created_time, + random() * 100 AS issuer_interchange_amount, + '{}'::jsonb AS offer_orders, + (:random_id % 500) + 1 AS transaction_canonical_id, + :random_id::integer AS mar_identifier, + now() AS created_at, + NULL AS card_acceptor_mid, + NULL AS card_acceptor_name, + '{}'::jsonb AS address_verification, + 'DEFAULT_PRODUCT' AS issuing_product, + NULL AS mar_enhanced_data_token, + NULL AS standin_reason +FROM (SELECT 1) AS dummy; + +-- Delete exactly one row using the uniformly chosen random_delete_id +WITH to_delete AS ( + SELECT id + FROM transaction.transaction + WHERE id >= (:random_delete_id)::bigint + AND id < ((:random_delete_id)::bigint + :range_size) + ORDER BY id + LIMIT 1 +) +DELETE FROM transaction.transaction +USING to_delete +WHERE transaction.transaction.id = to_delete.id; \ No newline at end of file diff --git a/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql new file mode 100644 index 0000000000..e0b0e52276 --- /dev/null +++ b/test_runner/performance/large_synthetic_oltp/select_prefetch_webhook.sql @@ -0,0 +1,25 @@ +-- enforce a controlled number of getpages prefetch requests from a range of +-- 40 million first pages (320 GB) of a 500 GiB table +-- the table has 55 million pages + + +-- Zipfian distributions model real-world access patterns where: +-- A few values (popular IDs) are accessed frequently. +-- Many values are accessed rarely. +-- This is useful for simulating realistic workloads + +\set alpha 1.2 +\set min_page 1 +\set max_page 40000000 + +\set zipf_random_page random_zipfian(:min_page, :max_page, :alpha) + +-- Read 500 consecutive pages from a Zipfian-distributed random start page +-- This enforces PostgreSQL prefetching +WITH random_page AS ( + SELECT :zipf_random_page::int AS start_page +) +SELECT MAX(created_at) +FROM webhook.incoming_webhooks +WHERE ctid >= (SELECT format('(%s,1)', start_page)::tid FROM random_page) +AND ctid < (SELECT format('(%s,1)', start_page + 500)::tid FROM random_page); \ No newline at end of file diff --git a/test_runner/performance/test_perf_oltp_large_tenant.py b/test_runner/performance/test_perf_oltp_large_tenant.py index ae00dbb3b5..842e6a904b 100644 --- a/test_runner/performance/test_perf_oltp_large_tenant.py +++ b/test_runner/performance/test_perf_oltp_large_tenant.py @@ -2,11 +2,13 @@ from __future__ import annotations import os import timeit +from contextlib import closing from pathlib import Path import pytest from fixtures.benchmark_fixture import PgBenchRunResult from fixtures.compare_fixtures import PgCompare +from fixtures.log_helper import log from performance.test_perf_pgbench import get_durations_matrix, utc_now_timestamp @@ -82,9 +84,81 @@ def run_pgbench(env: PgCompare, prefix: str, cmdline, password: None): env.zenbenchmark.record_pg_bench_result(prefix, res) +def run_database_maintenance(env: PgCompare): + with closing(env.pg.connect()) as conn: + with conn.cursor() as cur: + log.info("start vacuum analyze transaction.transaction") + with env.zenbenchmark.record_duration("vacuum_analyze"): + cur.execute("SET statement_timeout = 0;") + cur.execute("SET max_parallel_maintenance_workers = 7;") + cur.execute("SET maintenance_work_mem = '10GB';") + cur.execute("vacuum analyze transaction.transaction;") + log.info("finished vacuum analyze transaction.transaction") + + # recover previously failed or canceled re-indexing + cur.execute( + """ + DO $$ + DECLARE + invalid_index TEXT; + BEGIN + FOR invalid_index IN + SELECT c.relname + FROM pg_class c + JOIN pg_index i ON i.indexrelid = c.oid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'transaction' + AND i.indisvalid = FALSE + AND c.relname LIKE '%_ccnew%' + LOOP + EXECUTE 'DROP INDEX IF EXISTS transaction.' || invalid_index; + END LOOP; + END $$; + """ + ) + # also recover failed or canceled re-indexing on toast part of table + cur.execute( + """ + DO $$ + DECLARE + invalid_index TEXT; + BEGIN + FOR invalid_index IN + SELECT c.relname + FROM pg_class c + JOIN pg_index i ON i.indexrelid = c.oid + JOIN pg_namespace n ON n.oid = c.relnamespace + WHERE n.nspname = 'pg_toast' + AND i.indisvalid = FALSE + AND c.relname LIKE '%_ccnew%' + AND i.indrelid = ( + SELECT reltoastrelid FROM pg_class + WHERE relname = 'transaction' + AND relnamespace = (SELECT oid FROM pg_namespace WHERE nspname = 'transaction') + ) + LOOP + EXECUTE 'DROP INDEX IF EXISTS pg_toast.' || invalid_index; + END LOOP; + END $$; + """ + ) + + log.info("start REINDEX TABLE CONCURRENTLY transaction.transaction") + with env.zenbenchmark.record_duration("reindex concurrently"): + cur.execute("REINDEX TABLE CONCURRENTLY transaction.transaction;") + log.info("finished REINDEX TABLE CONCURRENTLY transaction.transaction") + + @pytest.mark.parametrize("custom_scripts", get_custom_scripts()) @pytest.mark.parametrize("duration", get_durations_matrix()) @pytest.mark.remote_cluster -def test_perf_oltp_large_tenant(remote_compare: PgCompare, custom_scripts: str, duration: int): +def test_perf_oltp_large_tenant_pgbench( + remote_compare: PgCompare, custom_scripts: str, duration: int +): run_test_pgbench(remote_compare, custom_scripts, duration) - # todo: run re-index, analyze, vacuum, etc. after the test and measure and report its duration + + +@pytest.mark.remote_cluster +def test_perf_oltp_large_tenant_maintenance(remote_compare: PgCompare): + # run analyze, vacuum, re-index after the test and measure and report its duration + run_database_maintenance(remote_compare) From 8566cad23b576fcac7b055b88a33acc74335323d Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Sun, 16 Mar 2025 18:17:58 +0100 Subject: [PATCH 202/207] chore(docs): Refresh RFC guide to suggest using YYYY-MM-DD prefix (#11252) ## Problem Serial/numeric IDs lead to collisions, which is not critical but looks awkward. Previous discussion: https://neondb.slack.com/archives/C033A2WE6BZ/p1741891345869979 ## Summary of changes Suggest using the `YYYY-MM-DD` prefix, which i) has less chance of collision; ii) provides out-of-the-box lexicographic sorting; iii) even if it collides, it's not a big deal -- just two RFCs have been started on the same day. --------- Co-authored-by: Alexander Bayandin --- ...e-limits.md => 001-cluster-size-limits.md} | 0 docs/rfcs/README.md | 43 +++++-------------- docs/rfcs/YYYY-MM-DD-copy-me.md | 30 +++++++++++++ 3 files changed, 40 insertions(+), 33 deletions(-) rename docs/rfcs/{cluster-size-limits.md => 001-cluster-size-limits.md} (100%) create mode 100644 docs/rfcs/YYYY-MM-DD-copy-me.md diff --git a/docs/rfcs/cluster-size-limits.md b/docs/rfcs/001-cluster-size-limits.md similarity index 100% rename from docs/rfcs/cluster-size-limits.md rename to docs/rfcs/001-cluster-size-limits.md diff --git a/docs/rfcs/README.md b/docs/rfcs/README.md index f7b0b3a587..094f8d5360 100644 --- a/docs/rfcs/README.md +++ b/docs/rfcs/README.md @@ -1,3 +1,7 @@ +# Neon RFCs + +## Overview + This directory contains Request for Comments documents, or RFCs, for features or concepts that have been proposed. Alternative names: technical design doc, ERD, one-pager @@ -59,37 +63,10 @@ RFC lifecycle: ### RFC template +Use template with `YYYY-MM-DD-copy-me.md` as a starting point. Timestamp prefix helps to avoid awkward 'id' collisions. + +```sh +cp docs/rfcs/YYYY-MM-DD-copy-me.md docs/rfcs/$(date +"%Y-%m-%d")-.md +``` + Note, a lot of the sections are marked as ‘if relevant’. They are included into the template as a reminder and to help inspiration. - -``` -# Name -Created on .. -Implemented on .. - -## Summary - -## Motivation - -## Non Goals (if relevant) - -## Impacted components (e.g. pageserver, safekeeper, console, etc) - -## Proposed implementation - -### Reliability, failure modes and corner cases (if relevant) - -### Interaction/Sequence diagram (if relevant) - -### Scalability (if relevant) - -### Security implications (if relevant) - -### Unresolved questions (if relevant) - -## Alternative implementation (if relevant) - -## Pros/cons of proposed approaches (if relevant) - -## Definition of Done (if relevant) - -``` diff --git a/docs/rfcs/YYYY-MM-DD-copy-me.md b/docs/rfcs/YYYY-MM-DD-copy-me.md new file mode 100644 index 0000000000..8487861e6b --- /dev/null +++ b/docs/rfcs/YYYY-MM-DD-copy-me.md @@ -0,0 +1,30 @@ +# Name + +Created on YYYY-MM-DD +Implemented on _TBD_ + +## Summary + +## Motivation + +## Non Goals (if relevant) + +## Impacted components (e.g. pageserver, safekeeper, console, etc) + +## Proposed implementation + +### Reliability, failure modes and corner cases (if relevant) + +### Interaction/Sequence diagram (if relevant) + +### Scalability (if relevant) + +### Security implications (if relevant) + +### Unresolved questions (if relevant) + +## Alternative implementation (if relevant) + +## Pros/cons of proposed approaches (if relevant) + +## Definition of Done (if relevant) From 966abd3bd602899a94b0675be9dd8faa8a8a6edf Mon Sep 17 00:00:00 2001 From: Alexey Kondratov Date: Sun, 16 Mar 2025 19:39:54 +0100 Subject: [PATCH 203/207] fix(compute_ctl): Dollar escaping helper fixes (#11263) ## Problem In the previous PR #11045, one edge-case wasn't covered, when an ident contains only one `$`, we were picking `$$` as a 'wrapper'. Yet, when this `$` is at the beginning or at the end of the ident, then we end up with `$$$` in a row which breaks the escaping. ## Summary of changes Start from `x` tag instead of a blank string. Slack: https://neondb.slack.com/archives/C08HV951W2W/p1742076675079769?thread_ts=1742004205.461159&cid=C08HV951W2W --- compute_tools/src/pg_helpers.rs | 4 ++-- compute_tools/tests/pg_helpers_tests.rs | 3 ++- test_runner/regress/test_compute_catalog.py | 15 +++++++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/compute_tools/src/pg_helpers.rs b/compute_tools/src/pg_helpers.rs index 802e3e93d9..10d8f2c878 100644 --- a/compute_tools/src/pg_helpers.rs +++ b/compute_tools/src/pg_helpers.rs @@ -208,8 +208,8 @@ impl Escaping for PgIdent { /// Here we somewhat mimic the logic of Postgres' `pg_get_functiondef()`, /// fn pg_quote_dollar(&self) -> (String, String) { - let mut tag: String = "".to_string(); - let mut outer_tag = "x".to_string(); + let mut tag: String = "x".to_string(); + let mut outer_tag = "xx".to_string(); // Find the first suitable tag that is not present in the string. // Postgres' max role/DB name length is 63 bytes, so even in the diff --git a/compute_tools/tests/pg_helpers_tests.rs b/compute_tools/tests/pg_helpers_tests.rs index f2d74ff384..b72c1293ee 100644 --- a/compute_tools/tests/pg_helpers_tests.rs +++ b/compute_tools/tests/pg_helpers_tests.rs @@ -64,7 +64,8 @@ test.escaping = 'here''s a backslash \\ and a quote '' and a double-quote " hoor #[test] fn ident_pg_quote_dollar() { let test_cases = vec![ - ("name", ("$$name$$", "x")), + ("name", ("$x$name$x$", "xx")), + ("name$", ("$x$name$$x$", "xx")), ("name$$", ("$x$name$$$x$", "xx")), ("name$$$", ("$x$name$$$$x$", "xx")), ("name$$$$", ("$x$name$$$$$x$", "xx")), diff --git a/test_runner/regress/test_compute_catalog.py b/test_runner/regress/test_compute_catalog.py index 2e7da86d9d..0d3618d1b8 100644 --- a/test_runner/regress/test_compute_catalog.py +++ b/test_runner/regress/test_compute_catalog.py @@ -16,6 +16,9 @@ TEST_ROLE_NAMES = [ {"name": "role \";with ';injections $$ $x$ $ %I !/\\&#@"}, {"name": '"role in double quotes"'}, {"name": "'role in single quotes'"}, + {"name": "role$"}, + {"name": "role$$"}, + {"name": "role$x$"}, ] TEST_DB_NAMES = [ @@ -59,6 +62,18 @@ TEST_DB_NAMES = [ "name": "'db in single quotes'", "owner": "'role in single quotes'", }, + { + "name": "db name$", + "owner": "role$", + }, + { + "name": "db name$$", + "owner": "role$$", + }, + { + "name": "db name$x$", + "owner": "role$x$", + }, ] From 15e63afe7dde679a1b8c604fee3fec11004079a4 Mon Sep 17 00:00:00 2001 From: Konstantin Knizhnik Date: Mon, 17 Mar 2025 08:07:24 +0200 Subject: [PATCH 204/207] Support DEBUG_COMPARE_LOCAL mode for unloggedindex build (#11257) ## Problem In unlogged index build (used fir GIST/SPGIST/GIN indexes) files is created on disk and then removed at the end. It contradicts to the logic of DEBUG_COMPARE_LOCAL mode. ## Summary of changes Do not create and unlink files in unlogged build in DEBUG_COMPARE_LOCAL mode. Co-authored-by: Konstantin Knizhnik --- pgxn/neon/pagestore_smgr.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pgxn/neon/pagestore_smgr.c b/pgxn/neon/pagestore_smgr.c index 6fe95df3dd..ae92be4577 100644 --- a/pgxn/neon/pagestore_smgr.c +++ b/pgxn/neon/pagestore_smgr.c @@ -4176,8 +4176,10 @@ neon_start_unlogged_build(SMgrRelation reln) * FIXME: should we pass isRedo true to create the tablespace dir if it * doesn't exist? Is it needed? */ - if (!IsParallelWorker()) +#ifndef DEBUG_COMPARE_LOCAL + if (!IsParallelWorker()) mdcreate(reln, MAIN_FORKNUM, false); +#endif } /* @@ -4252,8 +4254,10 @@ neon_end_unlogged_build(SMgrRelation reln) forget_cached_relsize(InfoFromNInfoB(rinfob), forknum); mdclose(reln, forknum); +#ifndef DEBUG_COMPARE_LOCAL /* use isRedo == true, so that we drop it immediately */ mdunlink(rinfob, forknum, true); +#endif } } From 136cae76c23d4aca05c2033ec88547d53c723033 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Mon, 17 Mar 2025 07:25:12 +0000 Subject: [PATCH 205/207] fix(ci): correct regex to detect release-compute RC PRs (#11269) ## Problem The regex in `_meta.yml` workflow doesn't detect RC PRs for compute releases: https://neondb.slack.com/archives/C059ZC138NR/p1742164884669389 ## Summary of changes - Fix regex --------- Co-authored-by: Peter Bendel --- .github/workflows/_meta.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_meta.yml b/.github/workflows/_meta.yml index 9e49c1ebc8..bb2f9fa5d9 100644 --- a/.github/workflows/_meta.yml +++ b/.github/workflows/_meta.yml @@ -125,5 +125,5 @@ jobs: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} CURRENT_SHA: ${{ github.event.pull_request.head.sha || github.sha }} run: | - RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy)|(compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Falied to find Build and Test run from RC PR!" | halt_error(1))') + RELEASE_PR_RUN_ID=$(gh api "/repos/${GITHUB_REPOSITORY}/actions/runs?head_sha=$CURRENT_SHA" | jq '[.workflow_runs[] | select(.name == "Build and Test") | select(.head_branch | test("^rc/release(-(proxy|compute))?/[0-9]{4}-[0-9]{2}-[0-9]{2}$"; "s"))] | first | .id // ("Failed to find Build and Test run from RC PR!" | halt_error(1))') echo "release-pr-run-id=$RELEASE_PR_RUN_ID" | tee -a $GITHUB_OUTPUT From fdf04d4d81f38b3e4901b118af92974de9709f6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JC=20Gr=C3=BCnhage?= Date: Mon, 17 Mar 2025 10:26:45 +0100 Subject: [PATCH 206/207] fix(ci): use correct branch ref for checking whether this is a release merge queue (#11270) ## Problem https://github.com/neondatabase/neon/actions/runs/13894288475/job/38871819190 shows the "Add fast-fordward label to PR to trigger fast-forward merge" job being skipped. This is due to not using the right variable for checking which branch the merge queue is merging into. ## Summary of changes Use the `branch` output of the `meta` task for checking the target branch of a merge group. --- .github/workflows/pre-merge-checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pre-merge-checks.yml b/.github/workflows/pre-merge-checks.yml index 9e5c3df52c..3bd81f6538 100644 --- a/.github/workflows/pre-merge-checks.yml +++ b/.github/workflows/pre-merge-checks.yml @@ -148,7 +148,7 @@ jobs: ${{ always() && github.event_name == 'merge_group' - && contains(fromJson('["release", "release-proxy", "release-compute"]'), github.base_ref) + && contains(fromJson('["release", "release-proxy", "release-compute"]'), needs.meta.outputs.branch) }} env: GH_TOKEN: ${{ secrets.CI_ACCESS_TOKEN }} From db30e1669c40b8ef519a6618313c5ab2da2567ac Mon Sep 17 00:00:00 2001 From: Roman Zaynetdinov Date: Mon, 17 Mar 2025 15:53:23 +0200 Subject: [PATCH 207/207] Add /configure_telemetry API endpoint (#11117) Work on https://github.com/neondatabase/cloud/issues/23721 and https://github.com/neondatabase/cloud/issues/23714 Depends on https://github.com/neondatabase/neon/pull/11111 - Add `/configure_telemetry` API endpoint - Support second rsyslog configuration for Postgres logs export - Enable logs export when compute feature is enabled and configure Postgres to send logs to syslog I have used `/configure_telemetry` name because in the future I see it also being used for configuring a `pg_tracing` extension to export traces. Let me know if you'd rather have these APIs separate. In this case we can rename it to `/configure_rsyslog`. --- compute/vm-image-spec-bookworm.yaml | 10 ++ compute/vm-image-spec-bullseye.yaml | 10 ++ compute_tools/src/compute.rs | 18 ++- compute_tools/src/config.rs | 8 +- .../compute_audit_rsyslog_template.conf | 2 +- ...pute_rsyslog_postgres_export_template.conf | 10 ++ compute_tools/src/http/openapi_spec.yaml | 30 ++++ compute_tools/src/http/routes/configure.rs | 27 +++- compute_tools/src/http/server.rs | 1 + compute_tools/src/rsyslog.rs | 140 +++++++++++++++++- libs/compute_api/src/requests.rs | 6 + libs/compute_api/src/spec.rs | 3 + 12 files changed, 259 insertions(+), 6 deletions(-) create mode 100644 compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf diff --git a/compute/vm-image-spec-bookworm.yaml b/compute/vm-image-spec-bookworm.yaml index 0cf72b6f74..f63aa88da2 100644 --- a/compute/vm-image-spec-bookworm.yaml +++ b/compute/vm-image-spec-bookworm.yaml @@ -39,6 +39,13 @@ commands: user: nobody sysvInitAction: respawn shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' + # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also. + # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to + # use a different path for the socket. The symlink actually points to our custom path. + - name: rsyslogd-socket-symlink + user: root + sysvInitAction: sysinit + shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log" - name: rsyslogd user: postgres sysvInitAction: respawn @@ -77,6 +84,9 @@ files: # compute_ctl will rewrite this file with the actual configuration, if needed. - filename: compute_rsyslog.conf content: | + # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user. + module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging + *.* /dev/null $IncludeConfig /etc/rsyslog.d/*.conf build: | diff --git a/compute/vm-image-spec-bullseye.yaml b/compute/vm-image-spec-bullseye.yaml index 9deaf3ea55..8b3c681228 100644 --- a/compute/vm-image-spec-bullseye.yaml +++ b/compute/vm-image-spec-bullseye.yaml @@ -39,6 +39,13 @@ commands: user: nobody sysvInitAction: respawn shell: '/bin/sql_exporter -config.file=/etc/sql_exporter_autoscaling.yml -web.listen-address=:9499' + # Rsyslog by default creates a unix socket under /dev/log . That's where Postgres sends logs also. + # We run syslog with postgres user so it can't create /dev/log. Instead we configure rsyslog to + # use a different path for the socket. The symlink actually points to our custom path. + - name: rsyslogd-socket-symlink + user: root + sysvInitAction: sysinit + shell: "ln -s /var/db/postgres/rsyslogpipe /dev/log" - name: rsyslogd user: postgres sysvInitAction: respawn @@ -77,6 +84,9 @@ files: # compute_ctl will rewrite this file with the actual configuration, if needed. - filename: compute_rsyslog.conf content: | + # Syslock.Name specifies a non-default pipe location that is writeable for the postgres user. + module(load="imuxsock" SysSock.Name="/var/db/postgres/rsyslogpipe") # provides support for local system logging + *.* /dev/null $IncludeConfig /etc/rsyslog.d/*.conf build: | diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 58b99dde53..d31472b0c1 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -37,7 +37,10 @@ use crate::logger::startup_context_from_env; use crate::lsn_lease::launch_lsn_lease_bg_task_for_static; use crate::monitor::launch_monitor; use crate::pg_helpers::*; -use crate::rsyslog::{configure_audit_rsyslog, launch_pgaudit_gc}; +use crate::rsyslog::{ + PostgresLogsRsyslogConfig, configure_audit_rsyslog, configure_postgres_logs_export, + launch_pgaudit_gc, +}; use crate::spec::*; use crate::swap::resize_swap; use crate::sync_sk::{check_if_synced, ping_safekeeper}; @@ -617,7 +620,7 @@ impl ComputeNode { }); } - // Configure and start rsyslog if necessary + // Configure and start rsyslog for HIPAA if necessary if let ComputeAudit::Hipaa = pspec.spec.audit_log_level { let remote_endpoint = std::env::var("AUDIT_LOGGING_ENDPOINT").unwrap_or("".to_string()); if remote_endpoint.is_empty() { @@ -632,6 +635,17 @@ impl ComputeNode { launch_pgaudit_gc(log_directory_path); } + // Configure and start rsyslog for Postgres logs export + if self.has_feature(ComputeFeature::PostgresLogsExport) { + if let Some(ref project_id) = pspec.spec.cluster.cluster_id { + let host = PostgresLogsRsyslogConfig::default_host(project_id); + let conf = PostgresLogsRsyslogConfig::new(Some(&host)); + configure_postgres_logs_export(conf)?; + } else { + warn!("not configuring rsyslog for Postgres logs export: project ID is missing") + } + } + // Launch remaining service threads let _monitor_handle = launch_monitor(self); let _configurator_handle = launch_configurator(self); diff --git a/compute_tools/src/config.rs b/compute_tools/src/config.rs index e4acc5471c..290632e4cd 100644 --- a/compute_tools/src/config.rs +++ b/compute_tools/src/config.rs @@ -7,7 +7,7 @@ use std::io::prelude::*; use std::path::Path; use compute_api::responses::TlsConfig; -use compute_api::spec::{ComputeAudit, ComputeMode, ComputeSpec, GenericOption}; +use compute_api::spec::{ComputeAudit, ComputeFeature, ComputeMode, ComputeSpec, GenericOption}; use crate::pg_helpers::{ GenericOptionExt, GenericOptionsSearch, PgOptionsSerialize, escape_conf_value, @@ -216,6 +216,12 @@ pub fn write_postgres_conf( writeln!(file, "neon.disable_logical_replication_subscribers=false")?; } + // We need Postgres to send logs to rsyslog so that we can forward them + // further to customers' log aggregation systems. + if spec.features.contains(&ComputeFeature::PostgresLogsExport) { + writeln!(file, "log_destination='stderr,syslog'")?; + } + // This is essential to keep this line at the end of the file, // because it is intended to override any settings above. writeln!(file, "include_if_exists = 'compute_ctl_temp_override.conf'")?; diff --git a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf index 1937cdc292..9ca7e36738 100644 --- a/compute_tools/src/config_template/compute_audit_rsyslog_template.conf +++ b/compute_tools/src/config_template/compute_audit_rsyslog_template.conf @@ -8,4 +8,4 @@ input(type="imfile" File="{log_directory}/*.log" Tag="{tag}" Severity="info" Fac global(workDirectory="/var/log/rsyslog") # Forward logs to remote syslog server -*.* @@{remote_endpoint} \ No newline at end of file +*.* @@{remote_endpoint} diff --git a/compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf b/compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf new file mode 100644 index 0000000000..2580b61fea --- /dev/null +++ b/compute_tools/src/config_template/compute_rsyslog_postgres_export_template.conf @@ -0,0 +1,10 @@ +# Program name comes from postgres' syslog_facility configuration: https://www.postgresql.org/docs/current/runtime-config-logging.html#GUC-SYSLOG-IDENT +# Default value is 'postgres'. +if $programname == 'postgres' then {{ + # Forward Postgres logs to telemetry otel collector + action(type="omfwd" target="{logs_export_target}" port="{logs_export_port}" protocol="tcp" + template="RSYSLOG_SyslogProtocol23Format" + action.resumeRetryCount="3" + queue.type="linkedList" queue.size="1000") + stop +}} diff --git a/compute_tools/src/http/openapi_spec.yaml b/compute_tools/src/http/openapi_spec.yaml index bbdb7d0917..7c8f72440f 100644 --- a/compute_tools/src/http/openapi_spec.yaml +++ b/compute_tools/src/http/openapi_spec.yaml @@ -306,6 +306,36 @@ paths: schema: $ref: "#/components/schemas/GenericError" + /configure_telemetry: + post: + tags: + - Configure + summary: Configure rsyslog + description: | + This API endpoint configures rsyslog to forward Postgres logs + to a specified otel collector. + operationId: configureTelemetry + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + logs_export_host: + type: string + description: | + Hostname and the port of the otel collector. Leave empty to disable logs forwarding. + Example: config-shy-breeze-123-collector-monitoring.neon-telemetry.svc.cluster.local:54526 + responses: + 204: + description: "Telemetry configured successfully" + 500: + content: + application/json: + schema: + $ref: "#/components/schemas/GenericError" + components: securitySchemes: JWT: diff --git a/compute_tools/src/http/routes/configure.rs b/compute_tools/src/http/routes/configure.rs index 3c5a6a6d41..5c9dd22c3d 100644 --- a/compute_tools/src/http/routes/configure.rs +++ b/compute_tools/src/http/routes/configure.rs @@ -1,9 +1,11 @@ use std::sync::Arc; +use axum::body::Body; use axum::extract::State; use axum::response::Response; -use compute_api::requests::ConfigurationRequest; +use compute_api::requests::{ConfigurationRequest, ConfigureTelemetryRequest}; use compute_api::responses::{ComputeStatus, ComputeStatusResponse}; +use compute_api::spec::ComputeFeature; use http::StatusCode; use tokio::task; use tracing::info; @@ -11,6 +13,7 @@ use tracing::info; use crate::compute::{ComputeNode, ParsedSpec}; use crate::http::JsonResponse; use crate::http::extract::Json; +use crate::rsyslog::{PostgresLogsRsyslogConfig, configure_postgres_logs_export}; // Accept spec in JSON format and request compute configuration. If anything // goes wrong after we set the compute status to `ConfigurationPending` and @@ -92,3 +95,25 @@ pub(in crate::http) async fn configure( JsonResponse::success(StatusCode::OK, body) } + +pub(in crate::http) async fn configure_telemetry( + State(compute): State>, + request: Json, +) -> Response { + if !compute.has_feature(ComputeFeature::PostgresLogsExport) { + return JsonResponse::error( + StatusCode::PRECONDITION_FAILED, + "Postgres logs export feature is not enabled".to_string(), + ); + } + + let conf = PostgresLogsRsyslogConfig::new(request.logs_export_host.as_deref()); + if let Err(err) = configure_postgres_logs_export(conf) { + return JsonResponse::error(StatusCode::INTERNAL_SERVER_ERROR, err.to_string()); + } + + Response::builder() + .status(StatusCode::NO_CONTENT) + .body(Body::from("")) + .unwrap() +} diff --git a/compute_tools/src/http/server.rs b/compute_tools/src/http/server.rs index 10f767e97c..179369e3ef 100644 --- a/compute_tools/src/http/server.rs +++ b/compute_tools/src/http/server.rs @@ -87,6 +87,7 @@ impl From<&Server> for Router> { let authenticated_router = Router::>::new() .route("/check_writability", post(check_writability::is_writable)) .route("/configure", post(configure::configure)) + .route("/configure_telemetry", post(configure::configure_telemetry)) .route("/database_schema", get(database_schema::get_schema_dump)) .route("/dbs_and_roles", get(dbs_and_roles::get_catalog_objects)) .route("/insights", get(insights::get_insights)) diff --git a/compute_tools/src/rsyslog.rs b/compute_tools/src/rsyslog.rs index 7537fafaa5..80594db3f1 100644 --- a/compute_tools/src/rsyslog.rs +++ b/compute_tools/src/rsyslog.rs @@ -1,12 +1,15 @@ use std::fs; +use std::io::ErrorKind; use std::path::Path; use std::process::Command; use std::time::Duration; use std::{fs::OpenOptions, io::Write}; -use anyhow::{Context, Result}; +use anyhow::{Context, Result, anyhow}; use tracing::{error, info, instrument, warn}; +const POSTGRES_LOGS_CONF_PATH: &str = "/etc/rsyslog.d/postgres_logs.conf"; + fn get_rsyslog_pid() -> Option { let output = Command::new("pgrep") .arg("rsyslogd") @@ -79,6 +82,95 @@ pub fn configure_audit_rsyslog( Ok(()) } +/// Configuration for enabling Postgres logs forwarding from rsyslogd +pub struct PostgresLogsRsyslogConfig<'a> { + pub host: Option<&'a str>, +} + +impl<'a> PostgresLogsRsyslogConfig<'a> { + pub fn new(host: Option<&'a str>) -> Self { + Self { host } + } + + pub fn build(&self) -> Result { + match self.host { + Some(host) => { + if let Some((target, port)) = host.split_once(":") { + Ok(format!( + include_str!( + "config_template/compute_rsyslog_postgres_export_template.conf" + ), + logs_export_target = target, + logs_export_port = port, + )) + } else { + Err(anyhow!("Invalid host format for Postgres logs export")) + } + } + None => Ok("".to_string()), + } + } + + fn current_config() -> Result { + let config_content = match std::fs::read_to_string(POSTGRES_LOGS_CONF_PATH) { + Ok(c) => c, + Err(err) if err.kind() == ErrorKind::NotFound => String::new(), + Err(err) => return Err(err.into()), + }; + Ok(config_content) + } + + /// Returns the default host for otel collector that receives Postgres logs + pub fn default_host(project_id: &str) -> String { + format!( + "config-{}-collector.neon-telemetry.svc.cluster.local:10514", + project_id + ) + } +} + +pub fn configure_postgres_logs_export(conf: PostgresLogsRsyslogConfig) -> Result<()> { + let new_config = conf.build()?; + let current_config = PostgresLogsRsyslogConfig::current_config()?; + + if new_config == current_config { + info!("postgres logs rsyslog configuration is up-to-date"); + return Ok(()); + } + + // When new config is empty we can simply remove the configuration file. + if new_config.is_empty() { + info!("removing rsyslog config file: {}", POSTGRES_LOGS_CONF_PATH); + match std::fs::remove_file(POSTGRES_LOGS_CONF_PATH) { + Ok(_) => {} + Err(err) if err.kind() == ErrorKind::NotFound => {} + Err(err) => return Err(err.into()), + } + restart_rsyslog()?; + return Ok(()); + } + + info!( + "configuring rsyslog for postgres logs export to: {:?}", + conf.host + ); + + let mut file = OpenOptions::new() + .create(true) + .write(true) + .truncate(true) + .open(POSTGRES_LOGS_CONF_PATH)?; + file.write_all(new_config.as_bytes())?; + + info!( + "rsyslog configuration file {} added successfully. Starting rsyslogd", + POSTGRES_LOGS_CONF_PATH + ); + + restart_rsyslog()?; + Ok(()) +} + #[instrument(skip_all)] async fn pgaudit_gc_main_loop(log_directory: String) -> Result<()> { info!("running pgaudit GC main loop"); @@ -136,3 +228,49 @@ pub fn launch_pgaudit_gc(log_directory: String) { } }); } + +#[cfg(test)] +mod tests { + use crate::rsyslog::PostgresLogsRsyslogConfig; + + #[test] + fn test_postgres_logs_config() { + { + // Verify empty config + let conf = PostgresLogsRsyslogConfig::new(None); + let res = conf.build(); + assert!(res.is_ok()); + let conf_str = res.unwrap(); + assert_eq!(&conf_str, ""); + } + + { + // Verify config + let conf = PostgresLogsRsyslogConfig::new(Some("collector.cvc.local:514")); + let res = conf.build(); + assert!(res.is_ok()); + let conf_str = res.unwrap(); + assert!(conf_str.contains("omfwd")); + assert!(conf_str.contains(r#"target="collector.cvc.local""#)); + assert!(conf_str.contains(r#"port="514""#)); + } + + { + // Verify invalid config + let conf = PostgresLogsRsyslogConfig::new(Some("invalid")); + let res = conf.build(); + assert!(res.is_err()); + } + + { + // Verify config with default host + let host = PostgresLogsRsyslogConfig::default_host("shy-breeze-123"); + let conf = PostgresLogsRsyslogConfig::new(Some(&host)); + let res = conf.build(); + assert!(res.is_ok()); + let conf_str = res.unwrap(); + assert!(conf_str.contains(r#"shy-breeze-123"#)); + assert!(conf_str.contains(r#"port="10514""#)); + } + } +} diff --git a/libs/compute_api/src/requests.rs b/libs/compute_api/src/requests.rs index 3fbdfcf83f..d88451c549 100644 --- a/libs/compute_api/src/requests.rs +++ b/libs/compute_api/src/requests.rs @@ -30,3 +30,9 @@ pub struct SetRoleGrantsRequest { pub privileges: Vec, pub role: PgIdent, } + +/// Request of the /configure_telemetry API +#[derive(Debug, Deserialize, Serialize)] +pub struct ConfigureTelemetryRequest { + pub logs_export_host: Option, +} diff --git a/libs/compute_api/src/spec.rs b/libs/compute_api/src/spec.rs index af4264f8d2..868a14edeb 100644 --- a/libs/compute_api/src/spec.rs +++ b/libs/compute_api/src/spec.rs @@ -182,6 +182,9 @@ pub enum ComputeFeature { /// Pre-install and initialize anon extension for every database in the cluster AnonExtension, + /// Allow to configure rsyslog for Postgres logs export + PostgresLogsExport, + /// This is a special feature flag that is used to represent unknown feature flags. /// Basically all unknown to enum flags are represented as this one. See unit test /// `parse_unknown_features()` for more details.

4(nA@lrKNBFe#4>syt=) z5l5^QMR$yyZze33ga#;#1gMVZefP-TG22%;@N9&|H8veHKYL$U{2_76xo~Fd(j&Cq zIcbUELSOu2;e0Xwv4NVOLE-DL6vq42HN(oZ9$fK~`fOF<6Nk_z$%daSX?D*|DxVxx zNcH4)e@%L$5b$xH^rKX;z_+o5JiEtY;$oufN%fyFOw1O=Zp}ta0o-5a4IZ8DDwr7b zU@a~xv#i+byE@&wU$$QO{dck9sU9GZ{xqDQ=GDh5SvFyh`ETRT*Q*x(PG9#}IH1R7 zWTNcRdU+_!TF1QZZbEflnV5HMh46G&S)Ifnnfk?Nrz4j$JajsCtH2|aqR7I6 zSBmA&htaEfQ-2Q>=EIlVaGCH;+ro`2%kS;7uKALs`JNBHs@fFiC)Ph&7Zv}*GSl)n z%Ya+ebMoVj+IlM?<;q|2(r-<_)m5^|ecYW0 zNUh4u59jQdc{b#WshR1>3Gj1%m&5Zio0iF~?VxR{qvr|T)h{pS|57wUr-z1|q@Lk| zzvA&L#fk;r*N?)oxcDo@9^~n-SMBmuDw2KaIm6mi2PJ;QUer_R(L-NFZ=?H63@`p> z{1`CScIMVPZF$u9>pzl*!I&k?3y1&EPm3FW%USLGNnW|IS@)}<2&HI?T$A#Ey{cbG zQ&N?-`gr(wY^j}E9gQ&s>0k}u&<3+ z(vNe+OI3^g>K>>X{%Z*9E8?Jv-nLBhEG@6g*3_qr&D2R93(EY)h{!n2y zETGgSz$i`){|ZV9~o zmo_@$F+4+-kw#&stMW-pWy3Z5E;4~Zh7&SK6!-QpJ^#U>>1Yyz)a%RS!>94A|Ke+P zZIjVAK!13dPBCE#&LL*LU$i0`Jwjf*O|Sgl4V+% zx(xr~xlgJ7$x(6XzcvAgm!-jFQXy1^k~>^yK5QynA=+uLHFZm(=@SNYc6S^u<6mQl zh^HvIq{7i0|KOXvyIg0d>FKJ-hY!ICtdG=_UR%4Rr!Ape($l`u&vEl`sZ8cdIK+{U zjgA7N=pU)S-K0NNlazc+@Ro{>T3OP;aTg$|t}C0sI);}p7SC(JFxN*>tN|WNqB6{o zw9iORp5znf=HtrObmHcXl$cLX=I={!q2|)gWM{C~OrYn==LT?@u}4bS`$=wt95hEL zx$^Oy@Im;!oV$*i1gj*gmdZ^YGO3n3Zl>5Ok7%93xBT{1klt2DVzEr{pD1-PXr z`FLVt$QUd*=D1X1lDqr9Lds;ATfztQ;}`P*^x%KJ-WjUBY$ZIJX$!f@KXYZq#&lBv z>NvP>aJ;Dy9pYpvHVN%lfywc&m58`F+0rDa?IU?57$SjVN$C{v92{H`NCu7&O)_4t zD@S~3&a+b(ml+B-nt50^1?O{u#2Jr_x^40TDkj-on2TbTD?i3@@C}M8g#@QsdHZK= zp^wX&&GSr=iDTh{g~g{ud$724ONPbi&EWN76s`t*zB}0(urq77vNmt$hg+EPe3l>T zQnlS;6lmqfJW|*Z%;C*CD$J-5~6pEJ&o`MtO#bl&?D*HV^o7QB;d<7cP9 z;q|c7b)8=5J8F?leEC}fV!1$9*~ z!PF}}sAG!W$z7LBQ*v7;Z*R1STbpWAwWDu)X)aRagnJ3B$|U>h_!GBoJLT?$fX?{j zwd2l1yCQJ9jpv%00|V2x4WairHZkRBd8*ijv z^m%Z~46_&>wv)uYwPj^>L@ZkH?xAq&*re?e41r6lUDMvO6o$jY%emT7LxIa8l1X*q z9fea86hE7=;z}a~!{b=?khEpWrn)I;)6YyA|B7n6(|2qiE-B+RKV-3d=U*gIfq(T$htZAm|_NeCc z@o^y-+1QPwhPjO7Fs|>=vu=J>95g^iwk0BSr0y}Eqt8H?c9n%S8#^hNkt{|-QWhMh z{R%xZ3m3zHfvF(MEf13vQ)b3KfR%T2;sNUA5nv=8L;tO&3}=iYz$8kJ z7$~7W5XQY5&Z(&%?^VLEGd4z?mI)C#i@r{v}r~Z<(dE#rg-s;)*1u>2I|Yu9Ht=Ly7hDcOY6R;~f?hSK7N|*!A zC_!;2xCU@|2iP@=WJEtHA!MiG26SixfYw|{06PhlDmpI3dO+Jw|3~z(P{!s}A~#Zc zv3I*zU||#20G*0S#1Ps`H0jRgp+l0_n2M9=WU5uwyKt;9FJSiWGz}80+aD&xIT)rz zW_dkQSV^E1?_a=@sumGXQ!ry(S z&_XBGFCzol?fl7qSrUPR(9aqRw}20r-Lzol z-fI_giO=N?-O_TyCk#2<+>cYGlMU&m_Vf95Kr5l`9)xrn9;OxEO(B;N2gZ^!N{R!B z7|hMph5@+^eF_g%gcZYiXw)= z^!mBW3Gf|B=_ExtsIemM%jx=2F?&W)J6O5R$oYC@lOc)=D!PkwE{ulRCfpSveZIUU zde#U`nXgZXFWpIGBP+3K=b+T3hhL8jfpyW>bgB?|7uk;OAZrURGO$pi(wB)x%mm~q z_mPmvXtsZdIguR*)^4nph~u=zT0!9aLeLkn*^fz~Vni2H#KHVWOmI75Ztw5!;P^0| zZ)hM)M(J{i65{P+yhdLdPZ`M?p6lb2+2#rdCo_tuw}Q`Y7IJaFVZ@kr_Df0)bdcVX z-QMtMSZJ8FiMkyma{Kc)Knn3*8PPJ94qJ5*B~F z3(j1mFN|Yd%S7=VCc*n(YP5>DuUHf@)9fm6H2_*YrVNncQ5gus7srITXpE6k9-U(F zyyJe@;q&8b0~9Vq2%dSLW>6r>F6!`L;Io8iJZtMgL-p)gP9{MG+j#gW#K8MICxj9C z5KML?-oLJ;iRFA39|$96PzQ=hQ``7cl@VM(MA2ThV`1@G31+VBXfLf@@5oRvS(7*A zi(ZIi{H*BedTG#HN2uGZGiX@;GuP=XzdD zwWTSZ4h|a7s`r5MsLnUePmicU0|DQIYB#sp-fSn*a|vH~nBl|3T%RUs-p+F(IJB{A z<9IN{m+e?=ODLf`948hJyMaf@#l!|0PzFfe+dYcHMFP^B5{L{vjMXA4W#Jvzx|4{y z)*;3M6UBAgNCiUEuVXnt!d8TS&m=%vdyJCvEUdq zQOTPGLRSR;Ao4o*ro`q=2}l<6hY<4z{2CeEwW#XC06c62{=)Jwtp2GXD8K>nGzruG zER6}fMi1b^w9})gl)$ZBlqz-u>%^N_GjvjJuV(aW0R3Y-uE7{uasQGyML^;*a`EbG zeL#819=_wS!Q?-13h4fq|Ex66{$CXzv_R zMEaP>L#ncz-fhOf{)Foie7bu$I|aXxp53W2F_noud*smiFrkTlYVUT@I507u%G}xk zNibw_<7C5uyAwdBj8Dh-AI0E@-B?9P88w3av3d)hxpgRzk`GI6=aT}2LJ{nm;^!c9 zP1blQf=ap9yQ(yfUmZ?&!CX%D-O=27XBk9crb4&m39ifJ(`!JRO&#b{(p`v_nLpa< z+h#Lq&7dCGUS)Y^|I^L~@yD)0lPKvr-c?gAYAE32V4@UtBdDeZAxqS5#~VmBbm>Px;PdIAu(6mSEd2hc9bh?hzufr8l1 z8i+ai-h_ZQgP>ZDoUROjRYkGK;sXWiU%~;Ig+iv*_oyg!zvquQV+ToSFGRqZ@9?V8 zgh}woet_VeMMcR16h0J2!8tA;o2UpKN6wdhKo9qm@ay#oVqMX|#KhgFK3~*m_$zQ| zKo$54YPKmr0w#BF5|1g$fw9_9K`gyotV=nnMGy-NQj2=O9TEl{GmIZ5A%+UFoupYs z(qmPt*<&;Tf>;rA^*0WHup2G_%$tZEk%NS=U$A{dsXu-{AI0N>zK3MvsG2VlAk=K8 zH*YsK2g7npw>y(qN@`J~CO%7eP!oM!b5qP4G;nO3z)bhCsmmo_j~W?*?HnSBL*(Ky z$O!X>im*0W7(S$`c>}0$`tl%=3d{laFrQO3S+S0Q;i@S5vX`_7yv%p#nOe6W>f1OV0bj8dF`QW zb&K^Ek@zh^#FsM72|+Bw32=POW|+>@zjoU)0mjcOV0yz9LuI0rpLi@B13$8(T{4R2 zuXr{jL+xDRj#aVSiZ1$qmV#68f3l%n64Wc1y)50{kOeyNkkBq^A%Xe{ib+9bRaGocIg&1sB<-V#6~Kowh}8u2I==#mI189gB`5WJ;_OM@kvL_@bE<+NE{ zWV6H&D1r`PrjPxM`4DJg z7lF9=HU~Bk4LaOWh$n@ws!BrvzW1AUK;M`0aD56W;tf^smM4(xW^=lUT2kG|W;+%A zX}NSa_7fWCyiN)3V-x~hcEah!jWzGw#tOcY+cLozDw|;}25G5C$d`EP6BQ?P$4D<| z6*JMFOxZO@MwCI7R0xDw;&mbm=2;VR>vM+I7-`kkXWuxQ+_dA2Ag3W*eJLQuw#4(K zHWPhK%kv>ELS03O6C18Y{9{KnmfZ}4!u{X0Oy)wp1G*&v)u#a z`{KJ4KX8wX*u+y@ZsM>+zqa;j1hQdzlsLjLZnFbK7g@rlsh63TM5Dw=90`nyI%?Sa z4RS0Y;)ZJ;gbMT^l0sr+Xv2}1XPLUhK12V>fsx0SAoWfHtgH&yl@9H&MaIzsS_+}y zr~@Hbk?iG9Ht+C@P}-4-P!HYW-d4(IF)hTH0ytbZr?-_X3B?=P6`zJ;jf@~H(b7%o zRjs9n@dsP;mByh6BaibL=h;puQwTYJ0hgrcIo5$od}g|Sz3bO-U9VmlS0ri?dCU#x zD3{t~!^3@aM}}UD*SG@JyZPvPZCwFa=jl$2-HADDfESGZU{@0K0Xqc;%c0rxx`mzv zUJ>Qo)UAgBh1bjvc}#!kFwz9%Qa~?$oaZb|=KWY^3YzwLdcl((8(xk$<48PM!>0XX z^I@7^>lf`hXrD@O2u#v9ir=_qS5shZFRT+{oat_rZ=o+bMV|S1&tv^m!|MMqDx3Oyo%yU8J=h9U#6-vzyY$LZjdX8-k45gpG_mm1~x?V87 zkjJ_g_;F^taGwo6(QMZ(su#;zSz(-bw|2@rGwh>ORN%4p9Zf}pTe$uy;{ zWHklLF8gu*gW@60&pVm8vWMrU#Duur1ZlO!+{$_hrHDTttm*#NvkOG+5%s^WsF~LJ z`iva6_tEf62lH2ac|FvNS@nv_J+z4aHQvlga-~d7&w9W!$lt}$?}C}}Q9G~x=ue!E ztaXY_<}`Qr7y0S9lZAI(05kZfY!e(wZx|_K)JpX~kSBf|?MN0*(Gdw2Yp*mV4$Im{ zPrH#lHKrJuSq|RSJy}~2lm5`HI*@Oee`KVUSy8!vg!wD+rFlB=m5Y)Tlh>MyvqL~F z)dRyZ&DBL_lRrv9)ouTse*8YS;qDiz@!7=YqeiKnzF1|>{Ly)aLZRvUU$JK9i|O&Fu(cvUz( zP&Z6v5vgiiSI#O)o2uwpoXEt=&I%xF8j4$zp{IufFZjYIvw%|8U zdqT5^rDn`?XpNb^@nqO5_EiV6Go`efGyQoFx5^Vt|BP$dzo5$&F54d9yL%}~ zTslwKnYFbi{g{A0D^7BLbk(u2p!8r=!9gxy>7Ua}ce2nCmhd02L}$$({O|EbY$Y%b ztN9~zdCrv|Lv|eOZ7ch%ie9pEFK4y;V?KNg8GG{3hg;1!z*q6@67y1kTSa%7{gwLA z?~2NguH$kB7gZlBg)PR*zirZwJmopzn(-%7oc~+<^(oI+9R0PCenU>K#&2e5_1`dW zn%Fg-nFaf=?oJu{hY!238WLf3nk~YHCjOpI+nPLcZ;PJjz0OybJjfWo7NJpIBP$&OqT zAI&*q?QE8c9}2vt40(Y8u8P}cEPEX#k9pI=)(RNN=9lI7y3H~Bll3_}Rb{WZ95UXg znZA0fu#|3AlT(<%n7Z~yaHwa7HvVCd8So4PY!xoFa0f~hdT#13Vv~V!QPo|)M3s9KRx{Vt77`M zwI=D%JoC3lT3_BOj``~<(FicM&3eTxI86AvEPteGH~beY*EH|#Sg~W2eu)iLde+<_ zF1#c5TmPwV>yY1`SmlLU{bY}?=?@DPqh_d^3T&FRtH3Y9LDA;Z?3dhckv1Ixp5=@2 zKm01F`HNit{U};%J3!+c>J8KPc)pV*=GQuvaqK8spzvepVPPoCk*k4kY@!q|?&}O$ zqP8fdT1wOL(6eg;alX_wm)Tq0*J%d|^CQr{xuE?y{*eh?$&cCdJr>UYe z!XqA)o@aN=>&J4m$v@7^)IHL|%M7OuR;PVrLYuW24!Y-!#-PoOSyL-Q5Z3Yn&k-S1 zO5xP<%9T-H%#W)4kD_zhEE7IYQ~Bu+G>zmq#yQ3#``VruJWz;JG}M7T%8qXeGG~)- z&bj*L^6`cIc)Y;A8zOh`pKe+^43AE~X0yI{Da^7gYH^*xvuFmilt3vS(m`paH=jE&Iu`5wj7>)=jX z5>`lMlzIB*0o^3TH5-XDQQZ>CDMiu~jRoJ#ISgcNV4 z&QJ6b?3=LJvh)oDH7`PDcZGao-VXghkAJMg^ZLj48+HX$cSn%R~{giren9$ zfS2PIL9c5Djy4`rSCtbMSV62gFzJqstY*rd>71D4jsALBLi-VC_*LP=H|E?KGJZ1+WOTM3*{isLxf?*60iSd=smP)TBbJq*zffL_76c-rG zPatX-(jk0m%G|UFI1+pSzgZl4)e#R)ehZrjX9T9Gqwo2JaHmHJ_Bnxk#)UHtqpnYe zR%$}}(vWbEoOLi?>vBC3hlSmqIW%?;@?M%?pf0)m*$(I$! zZ^^~Y)BO4l#~~c9oZII@)S;0WlbBp#xiV4pDacd?k*ntIMw+ytrkog~`fR4WtLD%# zG-U15#!wNdt+5Z6XE8@db?ckL z9#>3=?QDF0D-yoE#F4(0vQfO&*mijoAM?`1L$5w5x*jdFxZfICM`PtK99Xs!aHv}j zABZc#gyvxihmCI2Jb<+v$?52-u><7Lwnxc9l}ZrGA$?;|y*~ZKr=i}{*2n@A42t#; z@!R|$OGs?2O|HKzv@gMErF;C5j)9iMh;&u{uzm5{sjRVc)#0NdLgNIP{wkxb75SO8 zHRsA4a)dYt!$Pz-GifmpRr@TicPY@we_@U^;S2Y=aA)QVU=uMR#MQ+*j|KbN+VbT* zoa*$>mxMOo^-|Y#N{G;KU<-l&n>zg%M$3puTJ zBl+grG>4^o*kqkhV1pCladgUzeSP)_J9;-Wc{T2j;!HK9ZSoT;Fn3iv#FL4R$X6Sp zkSy9soXlSb;;FKUFG=V4vskKS21)}4WgT(+3=xDc*AdjmYca4WrW8Ywy!A}A?k_E1 z52rJ->hwy`UErEEm5D>hDSqUG~8pkLCb$Gi=tpZ26JU*S6mvEyUnwVl1eVw9h!VJ zWANy8rq?6n(jC8FnWNP8XYZq2HtS!l?E+M4Jbhv`u^uyq%rR9y5DoGo?@<|8X|mSU zQo}?JP7ifBHKWnXtGwN4?2l#k&T$HI7!&Pqm(3HT>Ulqq#atuGWav1!ezC;UVm_$Y zVTmq^)r{l59+6co8(Ho%o9}_)m}}C30eBm`d`M7~X1PT9M4RWx$RDwOz|v>YkXC@uWr-_<2CRf<4HmDiXLF z4bKdwSme{@j4-&bhN|sJzCW(csTSQvCr)-~JPgn7XL0Ypd#YT2qnqAOHc%5RtmU9` z@`$xykLpbwV3u;o30yXHs>X8oCX}tgsu_kueoAe7-jJg>WDVRsshNskM+Y!B=F|NZ zN%`}Je-O8hOR5>6<=sXD<(qi=lAv_QT_+HlW!vNEkjglQ>dZi=l-h7EU)vJA2@?bW zm4E>14%KY7N1bl1aXV=|VgyRxy3^m5PUD_QYFNdc3+$)MZ#f7DQiR(5;h`UOTSVGP zhjR4|Fg=~CXj^67I+^qj{M-n`wfFh%HyED3=|CJbiQSsfD|aN4!`LSu8UAIE6MqP| z3rxq&YHq^vVoDaI6*4l@*AX6bWoO3A^qv;T!bHD3v~;r5pj`-mP&5cXMdOx%{d)QDa z%Rwz>eNY1#9ith@kr97cH&{2_W|CkpHJG_2K9$piS^&ABP@1+Ne;IG^Y0%=x(c7eUkq2FwL%Dl9j4IlT=c`qHICS$=3vifIy1=c)#)+L7% z84HU}(vg}aEnB`)bGL8*()LdJ&(9Nfi8E!~PBdFS&f1N-yK-?)7S$0*(`JTyWR#)> zet)GKII8cadOdA%Fcy);bE(Np-!-uTIVEV}Nz-;q+Y9wfn@js?lC}Bx@Yt6Y02b+A zX#8~moI(i#Mz#Gdx-Or>olD8w?A3JCafz%eLl`}_-QA>JlNNY>JGjM_(AD99p^CYy zwk^ad^5B5`=(WB>5UG;2WAoUA0Vd(bQCIIksc?pBzv_rs07W^#Scr2Plg z*ilgpegsVW*4MEx3Nq3U60TiWVZ)OL08%Ed8k# z9x75`TokkGyVNisTb9t`Ba`bKs#?!rSd!V^AazJP+gwYHZ|z&gf34&hp2e$o`)x|) z{BVTq*3n-r3#TzLwaQ=$ZSI@a!su_)8BQ0eD|7UUF5|ZSXPYa75JZl?Hv1ZvWkL>$ z`TEwUO$|iEkprfVLlxH=b0JsJY=6j*^xrO2Wdpz?p60;hqZBGq459F9t-IBRo#JLM zLOq;@-fu(tn(uf?k*~+Jk_%h*iGS5XKr_6`N4Zba7_5a`C@_xu>L&($iH>#p#>B;G zZGxE?OLeW0bie_8U{K#fH=&ihdh5^JRJ4c60Ro`fl%`m@{x5@tw_VJrGh!9c7b#Vg!2e> z3SLn+^g&!QbbY;TD4t##H9J|QdhG9Hv?5V&x`K0+Id7~O7Z-nZsduX+U7%|I zgK9=V);jx<0uXBY5fxKu+|;LYlVp|vFhj-h*B0E#=D;xCVjsUzoL(3pknY&jGo$9A z$EcUJT<{v1=2}Mh{S?MeugiAjGcpFsm?XM>3OZ#E5$R1hAo=WBm>Jq$etBn-z@s|rTO3Z z6HR-Q(29fWhBDo}EGf$$J8;ms$Us`2K<>I}sA%x{)$X>qqwfmP=D_G`tCO@6$zZt} zW0qx-E2P+(cHhV~2Yu>(|+awuu3Agc2Fsb}|FS+yl!-cc~l{=!{7J>K|a(aw8zs;g#+mR+R_95@z+w291Je2qOyjUGKZ*U*Nv6)vo z#%b~57ez(urSGvO4lMi*0HkFzE@^v!uYViBKt8t2pT*JcyTehrILIYO=a{LTH)S0@9oNl1nXgn@t#*v^MR>dvnhw^LlIWGXtR<=vo?Eim&5cyka9AI zg}VuHUyG`q*zLF9B6@;=h zD4FjKz%8&M9S2M!J{qG(jWZ*tk=#>%mE}?Kj)RMN=KbL&)`ZMeI-)^)ui*%DkqWB5 zUpEOjUvmI~i?WTRSB18Uu{Z27#C*M_rtd_~P(qztn+*TWUfjd4Nf#>$ZUr+O_V4p|foKOpuaAxNm$#!)Mjucfjk}7baGe3uOjCr>7 z*4->-E*A^T(agT%&x1-XgNDv_o%T$iSr1d*7bg>-9eJ%wwHCnxCla-8rJHdj=t7>G zZ+?DoVv~pMCE(M&4@IUi?IDjQLMut-jE#+g ztwjrxN;IsQpQf*`h;m#{ROj%dX$msCLX9dr#dlX1@A!al3qaTTH+%eH*q3lp8@J|>~|-)fRVS~0l>%)u=GM3-=%za`-5 z^Km0ZqBAYw+h~}Rj6{Wbc)h4;B-GBmVr%eZXwpU>h~fR1_L!%MW=~QQ5dPWyUxDXU zYDB2Hl6ASaVb+(&(c4#qKhtE>Pg{WzY=pOrgTiq9-LburbtnsT>ViwyD%+*)usQ45#G2K=Iy0!KPFGrtE(|HfY z*V=&I1ga9vG}^c^6)rJTWCJ9vt! z1*h1jac~4_l7Rl{mM~Y38hFhREXUepO|<#F-JHiwbDl7JJ`HQ?cl$dn@NrlEqPCVOXz+_P zCeXw6JqXBQgtGme0Q2>^GY(jQOV)6s{ww~3s=mE`ES6&Yb6c>FdJ3d?*j+&@48y=w zM94NrXVumlSQ$Czope>OFPr76H@fm~CQAc#2=K)&dPrY&Sd=G;-NlxBoxdNwb!2s02^D!j<`Vdd>ue=HYv=mO52kJk)K#suvd2Jex(S!#f>P}U3?ZH zU$1To4FWf*kS&*TEt1yA1Hv0Hk)~YQU4ItUTh1BX=(qUWkf~*E!7>%Nx>EE}f(s%Qq1TDz%2U zj&{2N3m>m$dcZ!oL$31MLB)6?hZ2mNHg>JGT!;^@U~awWwz82T&*Au&)oK>G4HJ#& zk>pR<{QbgP1{WWXk2IZK5bp7-7rykT7-VlAbZ-~eYV<+=+k4MZAwqu#B<3^6cT@nFh*=y#_;MscV_umf{G9H@7x17oo>*+Z?AgdFS>9}#D z%x_K9)L_ol#1H2P%K8!zWV9!&krVw3 z;IQsVz|~JemJpWLhN!928*V?qq}yFJ=^nq+8~Cc?NP!4kyQ0(@JNPpjy{#MqH6XquMXjDDZb3#Y7q@UDDuVfe9jRq-u;A!N=+8*$T?&h%Pn!Rq%Ccf{AG?kE z%QPm34N2S{ufhvW+@AKi$;@=motsE-tsb#l^~dk{+vB`^?y#QUSv|?7!%cmGesTFe~$Pm&WfCm*NW}J52fV911Mo_yYYpPDb1-g7 z%Z7NzN1H8o!=GYrLc7Em(NslTHKdabHt;+$6tMZx>S_F1IJr?z{dB(5GO872$ zVm{|ZG{#x;6fA&B9@Id|k`wLWp}u#TPD~iG3&{b;QcRCaDC9k#?wS7I^(_Zelz!hq ze?@Hm)2uxzgVS=-+sl>FG;=9o)J4C>fnLU9p}tF^jc#y};=u%=OQL>>oMQgP&c{Vo za+gHeX(m{S94Pzt^MmQzGr?AR}|rZ^|cPBFpIav;RTlU^Ka8dgID*$-no7Pcj7Tg(@9N@2ZQ!$O+b@#Zj7 zE6dLjZRITwoL%*!%r_ejrm`ob6&0AR(CbgotKtzZml)>daFJ3CJ~^y=Vh~C1>9|od zwxzWJKoHnl-@P5&7W|<793;~ea^(dJg7B$Mw0KHMkj;z_A38A0sla9Shhn!EQuF>r z8D~6;LxWMHXZq4e?Gn$LQNH%Tt+>4AL-+4GN&`B6se8(hGx)!qUh%r!D0-9=QEn$c zT_Was60ibT0F43Qn<< z0qec7pP#$q0CqFZ@WRNhLkdMJbr2K)*0dm@bNjVR0Y@f>&HaJ+C1NHssXcH zSW>FGfj~$J_;xSV**j#DD9eL(T2*#PIM50km}@*m7?@Sz;}_phAvQ9l)Mc6VYzk%_ zb2s=Zha4&T6P(sQOzoQL9fInEo71_&ddhFslsGBkO+38MQJedaP6*zVr=#Yr;oM$o z%*bq;9OVQ=3F^jB2P_@~2gAGi#sX|qn=niU83A75i4B-ZbNfT6h?H+RDte9bM-mspO8jBQG(p}R7Ne<&3u6wPOu>&q`a7o{M zJXkp2`v!+|aC3W_^rT)Cbr>>F`6k3*HM2aB%RXFGkEkbsw)Dcw_ZAJP+ zr1-gdf}3g>yC!319#}|JC?7o}WLLL1OUYk?zs%EX|Km}@fSG%f6v!G?@*rpQspV5EUQ znf_4$inz<<4^o|h#Rm#nuwYa>uxi$dGR<73R|w$Y@3=rm={Axf>iDkKz4f!)n`zH93?@FPxtqK!vbQ`RZqN==5X4hHqj&>P zm*Pw|jTgPEAV%8-wnL0zp2DTN0vt4kd*sXZEXL-rRHA39r>Q6ML<%%C22#mIC80%# z1p1|72)9G6o7(9af8;84Xp;|qU+w?z$-Ao;Ta*asB2{hhPI-)f|iOb`QRj3mZ zs7jD+|I3;i>jbA5^J(4s4=6W)=7Qn45%o=g3Im$f?2@8Z{%}gp&vp}&Og`DNdfTj2 zLVhkhBBa&h7i%wxX+7*Z>piNbe;n+p%J8$EUzK43ma`2zYUa%p=@MQcTKz?nj zeqh`74G%M%T!Df(#;QXCjZ1Fj^>EP|%@YF!!H7K-T3Y-jnwd`;Q9C9;0}bY3%i(1L zXNd<8_ci3zK=~+{@9v`T&>yO@;GA*0-L6B0E-_}PW=Jubxp9V+LHRkQmaYf#?{Vc} z{L>mLpIyxPEb5&DaQZxCnd`_}cDyW>=yhFrYl^=EG7 zbl+@PsAQG{IOvjv`&9T%{f3KM$L)#;eEIKB+5~6bGr}3ab4jcEw&K0!Y#QQ(H#2OT zZN}*RO*&Wkj9>tf=a*z{GAWS0OKa9iC5s1a3KVZ3pTL%4*`hk7GXo+?CPzKf@LUk; z#KsE34FTRAQnxf^4hbnSAV>otZ7j4a1O6=$>oB!tq+cUZ2+&UpbtQ@GB<9RfAB{nt zOM`&D#*B#}6}VV1EJibKm-JK{RXa`~)iaZM+LE5Lo|{@bNNVV+M1gr^AniSt z#zM}~ytui8Sy=HmGU(oOcM=oH&eA4no@|rW${pB%vsT{fZQs3}uBi^dBQp;m8fqB$ zOFE}_EVNqVmQ$FenL9V)v!5WH9`CzFlim~;@9ouYKoGi}nWkL3gDTS-XtV-`ZzCj* z9)y8P4Src8axO%>NHv#|&j@^6COHwV7m5v^>dl>NQ*Cy!S4j@9`FaMmb4)+S`4*?{ zdCLyIK>=HobirC&X02ENUhToer{RRB!gbD{M}!CDL~<2AtO6!IE=MQ*F!HV%If)@R zHz+hvn-L`(QFa>yALM~~*3khxQQ7>^vW*-K)pz+VO zK?34i!}ViLmF5z^nj{@n`QwO@!ECwflCUnJwPr}hp8j%V;iqPANv6Kbz(c5ALmq%g z^s#WOfM;%w4x`>jo=0!f&3$jk${37!d_ST70QEHta|fxpJ6V5Btt7^K3d~`(K(a9R zsJY(I1R$|=w)7xGeZO%cyh6+7jIP}?YU?@uXNoD z(+^gFc|QNO@@9$}qp{GasCTQVho?{(Gj}W`t5(-K2(AiWpQo|d`*!R`#GeUeNf{T0S2 zMmMM@ar%2W{P~T3$>WSyQoxp>ysNsz+d@yt321*cN~!DxwG%K`2M}6}>21un1Ti#V zsS^$EFw(EC#v(VxbGZ9_L=m}=8DCy_3qwG8nk`-8d@9UlF2XoCZ)-JB+iFgWqN{CW zT!Xz5?NT z4#>9TBCc6RAcoGeV#jsu?WGIt`kn7qdpf9GO$5LGOjkcdycRpY_pE_Wm_# zIjuGLR`W~GSNQ{T@x!j8bR3_Pm2)=Im7jOD+}y+3hT6Y{UZQoLMr03CWdz7vGp(8^ zpgYL4OM3Z?$GEdKvwe`1xJ$=xx{jZr%g@Nq(bUqVp>-c@86WWPLEoe^dcWaQS49H- zvK`kUGWRI_29hK@=2_J~+LU$Bl;xg2m)4PZ0s|xUf5it_#}uoNy6xrgu6OUZbn^Je zh{FzTsr?nUa5>8>zhB%&(R=3%6D2sX2J*w$=?qh^s7hjZNaez?4TWo9qM=(9e~f9t zb-qh8ql04f9(6MY0xE@?#W+i~`&FT#@m%eiT$GrPV5C2Ib>^4vCOUOaY9U{iQZwMg zu097j5ylU2V^yU}x8=yDVMOw6Y^0~TH|@w~gcz+ixzO9&`l_^-?J8^ob6}-T&!OrJ z&K@{!ekhkKQ(?>)leOs_6AA|UniTSy1M)WlzzF@cP0+LdOJk+{D8(7b8DNeaH)=pV zOerzgO|pCD<^i*Z8)p6Rc)zEXf)*5Dhd8GL{v(X|=C)v17GJ#zueCOa3-l7|zfuUz zOeo0H6d3%7F=-~?s5>MGH%H=3_uB=CWcqy1^mX`DX9QlNH@KQr$dIplzHa#*)cB>8 z{+-lRJuPyMzN=six6GCWb!YP-Uz;pw~=yQFP0mcBvoidPmVx^5q#@O#^vZp@w1);Qy zlee1!MWB)*k)WmzaH@vM=9FC`o#}y(iU)DpGOSahPa?qiKc7hV!O;d9jL!O82)Dij zMi~fal^-ZgpKGpSd0za}`>R`4E6cmJQthPv?!X}C7eFajT5xf4fC5@fzSS@1sqOdQ zxCWAw?aM$9+?^^j6goC=5CwSEI33@AD+D?PfO}%i{Nuu89*#_|zEvE3m(LPPACq06 zwT*SK6`-%HH5l-X-ZRN+xiQU#t!1xROqKR6)sG zbY6S!jM`KPp*B%VV}IRJvwFW|J$*7yMBE!!cN<8;Lutfk?BgM5&~-l>w?Kq%J=OBB%~&Tr zSkv-jjFo-J_8$BCOJ@%wsoKGbhzSJ+2w)t#sqoN2pfyKgv4(Yyei55kWy_}wme$VX z9w&rdp4pokdp#ma4S-Hw*aTA5Ehok96d>=qA}MI1=53YeB^T2#tFXm5*9#bcWQdm z#oCT*gn#1G$2vm>4e59kA?gRf?QG%RW4i z+&ai9f!eWkkWx;?d%*t1#+tqm9MjCMG4}|NYR-{ai!ypFK_@pM{&GwpcpVxfyM6Ma zNv0jvLQh)93~al^A_|KUBEbIT_OsF~B1$ zvkhm)N12pNkG{oQRGRqCwUw$E`PNbVK1CtvJzZU1vL{MwdJAzDe;!iY!wdUt;{##Q znr4gkA$a*Q4!D%fix({SfWP~noA>EBgn4&%g87}ij}*qvN^@g}E$X`C1^yN~{9rrY z`ty(F)cwllhubKxs9bwwz1qaD4(nq%#t1H@aJ-)0U8p0G%AScxbAbeh@Ck}=NJxG9o(u`a;Q$PR9 zaPs4SY1Jb@rET&`VVwN6K1C?A_IR@NOBb?iuaJ;$#3fIO^DD9m_nG2~vc{wDk8!v? zkR!Iw7PNh~@w%89MO?A4$Z2ygCO3?vsN}wkBa86P425urkIvk4J?~+)@c+KteWpEw z#uBEBJXO@ux}Wy4VZ2#ta}Ykmx4f(T&LZ78pz1laTf%K=KWg%Yf{ym-yd)^<3xazrJ%Hw{>H^GOMzgEzKBA>k_XPx11>gf>h>nzc~ z&>K}K(0BmRx%m;DJNc|F_|P>U(SG^NF|QE(c4S{DMtmy|M|VgomE>#k7(R$yh?K@+ zufNNgJYoLnz3}*?@0Z-yEb{bJc{eM+ppK{Z_we58zxyRsZjnSYL#CAYOFx~;UgzP0 zemH+)PXSeUVdg^u#pm13|KScz$6DjQK9{KF3)Q24njJs3-``oNtTZ}T3DT>9Hw;La zJ+#}ZFs``?$u^GBc^6+Mw14+;6tUwfJ;HS}NF9BV&ZEJ4s1#@I$x z$ee85|Jq<{f@8=B9PY|ES5OZFlHTA6XWlez;P?gv79LA)XfQtr;d zc)fD@9KTl|n)0R^H3uHE{j$?^Sz#;SxSsl*CP%$yOdMOH!^py}h}#cgK6^i~N)^=Z zlz-2ZhwXvSQK1CMI`SFs@|P$78T=hc6khEpVT>!$pNsbK0+bMj_rW{Q}S zPVkqjmN#lY1&#$@X}S@lqb3EMUdO%lMt9(MlAWS`>~vP<_qzY1to)eWahtpw`d&YB zk;YeO!r9&dDi~?3`}9Axb7kNH3^c|MhvyxF2;>;NWZka4mtP=L6H_-_ph`IAy3iKei>@6z2b$A1w`k;zr9jmy0JcSYULdvc#o z%=Y^{NRwOYGvY!_4m`2!9nQ^Os5FvBtp8Tut7nAb3 zf%!37c{8{(Kk^YZmMos`E>C2-U*wOg&5b7ge&gNWKP2Y*$ba3zuVh^I^!!R43OQ=y z?vEV14UUWM}9LvE(@@1x+zdD?}mFo`YD{)Mu} zyEuqR>X-Ni>Ev#nz^@jsW5hBt2wKST{!@B;R=~8m|FBN;VDj-}3bJ?4)ebhs1AJZY z<{VZS3;XSBjE}^uyY(6l1bE5iUlTI9=l#Y!qEbE*KJ$9wQY_z8ed8Q>ML_((TR~XF zS`}w(Iaty!PD|lG?OlJV`R>Pr7QIlHSM7H}Ge&~Asa>lxl9B}7;%<}vj3~SzM83AO zu-J#4kSg&U!=yS3MZ1qu;jHq`JK%7dw;b~y;^C5B>E9(yyvE9IuRU$hKatMx+Jff= ztKr|Sc^I_>C0-!Pe(M=b^tZmsSf+FwQ~F1$6!tEQk_rz*r`yj~}bcR_>s=kA*2k`gPF%}nZ7 z_uVTGH_KiqgKt|uXAu=Ky>8>?>eZ{A5lNCdiWsyD^O-(TR^{9$Y{+T8Be@4620gSA zQU2X;I&&0-x~y0se01g(8{(x*qvf>@{sF!7^L3c(cq^|GMEyyOg7JaD4R6Key@%`L z&ReI7n02!Z6_aee@X296QOl?fnK*r=bdrax4)=AZ5|@fMzsJ@;-7Y(TMN$x-&foZ2 z8h#9hHE2_(N5+4SCUur#-1HI&VRW0KxrEXEJQRGPeW7jviCH}AS1;F2vhd8Uc?4X6 zhJx;=8hePvcm4N8W~vUQaw+$2s7N|4v)(2CK~CFRWA1%EOlONAn0eWJ`IRTz?dVCo zV3HfHC4Sz$s;;yL%-7Fe^mG3qNa#h+>R|L}US*3cJ}*pU4_tiq@MhidJ7(S~nyEpr z#`^EhJ=?ei3h{;Hi`??Ejhh^wPkt}hanKDh^ z#8cM=I8LmZU#sm|sAPXWoEH4aU2Ua;la;zhmiBe&HR@`<*fraR%ejwR+53^Cm=hQmvLzE=5zpTQO+5H&7hHVSug9lV@{ZsAbs=6l z)Fp+tIBM$Aqo;Cj)V>VLbIf!2ZWa=4Gu=~EPoZJPx9vYQ`x%m)_aJJD@+(8dX4oyo zKI+F~#h>~ICe`@U!H`rYN%*G0!|~P(+U%Ff^M4=}qrJYEqX*YcI?V4lx(1j(+-F=C z;*cu3xp+~*MC`3a=<%qUThxYhM;5OoaP-g9XJt=g{ATbDZNJ#rZHG^G%;EnlF!Y+w z`|WR2&BaFGX2fFLMs_?&50oE$5|UJs|M;)KEiHS2$%Q7Lt=L;er}S{vimc~p5Oi_K zTr8S=3smrm>>*jywqmGk(U7#=&V36CwVDe(hD0+H!PS+-(bRmNsAWH@61B(|%Q5=# zcD$;l4}LGcxQ-(Y$b4jG?V*#o`MNd2a;EFXI}GVelWAx5*WI3 zC1-m{ZqBb5;rOS0ve|i!Q3>zAun)e*qmeT1339JfM5T^SeT#pfuc zq)P=Ae$B&VM;)b1@zyW9N;YO9=F!qW{x+x8+md;_3G~vs5S`0CsUP;rPeCUhK6s2r zPIF<@b;_Dt0jK{MFW~gk{a0I7{>NcD_sdYC%yI#n`}JY&j6QM&$j8MLDg#OlAp@7- zmOl2W*}G?+4_g;!abQBd5b*4g>aUK~dCm`09eyk?B1b=yhX1#%`u3H_e0fP35lK_( zo3|;eZ<nRhzm#Ev%I`0;nQ-{WZ_{y0> zM}$()-SM+)I=r&ZL7)5Th@gjDUIVGFQg&xzw?jfrdaQ^W%j&^R^I}6{8)Lqbk|uRx zyU%!9Cc57XzuC#F#DfouA}8H0Dbj5$KUsTCqXtu=dBXPgSH7FtRxntgh5my z6rp=K{<16Q1$Cuv*_z`rKDp>Xi5e_L;Zibjry8${lBNd%Vs>Y?N!#A+I2oH$M`oPr zGN3Sik*$;&YP|lhemLd7{aj00K&dX$UB>*OjzH1vOj!T9TD5~>vslEJ6dqbo4!&yM zp{(v?fzLl-KYBd=A9*wVV?q+If{?9He@2$?f6Xo)aHRJBY^U=gPtQZI8-EWThpfF? z8>!l<@v82Pa{l3s8_Re}8ZG}yaYIlk&s4u$Xohq?TPR0iZs+P-Hj6HvfP=PsL_SB* zrR?Q(MChA~R&zx9{^!px4(k}MDA;^#+qj-DHf+2;Sp24R_#r;#_QfQ)SZ%<}yTV%S zxlTE644*gcJUohM0lHs!!RjCyc49(=dXY(k9*j_!Cr)Oa5aI0CdVQuqqx0y%?llRB zet=x!+oJzndrz7Q{NB>pQa}s;9ojgh|-%N34Mv%EbfDu`>elTP3_oc-nmScBy}A0WHU3@ zCEG^Hepk+&G=(en4$yv3{>JB_C&sMqGSF*EQ@PpZB({k59MN4Cd(|W<$8;i>OFx{+ zOY>eg1Vy&}m=qmUc0uO-N^$#SUi1-+@!yHpiZ07@Du+uO$#%NNh3g&t-*j6PvA(+2 zUgWBNEYD}jzaEKBI+V}-o*AVc^HrF{vx|>G<;RfX}G*Q+jWmlSt4T z>TXEpp(&49$var!mHSP!Y14Yj}X@`od z|AX5pb-*+k@?;)9XV9_=b=ICHJ#+8Om8`K9xk{H@QWqiK)F z1)QBJYe(UlsCia|Rtq>p^mDv?FBWXKU2 zN5jZUp%&<4G4t(PVc5V!BJWRKoI5*WO>YUU9=-Xqr8HW8c16>m_5H{Fc->U!P6 zU{vTia|sd0LvfrvjVUB=n0Zev;pAJVYLTE{Wv*whPF`PmJ*}(YTpaRn{p{Wcy{9aH zde6NU4(`2vDf|9t2zt1@DgW&qL0!zEvnrxj69xV9#Hcg)Wa?OC{*t86EQRWomtTmZ zA%kSOVT=ebxO=Cu*3oSuKggAWY4bT3VwC`Y`oTY&pE<+wUw!PEckP(Dex`p}@eBW9 z>x_Ncyuo)!C{)5|mdr7wpCN7Xs_l8V(&{~TKUQ;h=!G8AmyPe!K1?fR%Z%UPSIB5- zPTlk1oSo894W(BL?J<*q|5X_5ygoH?Qjxhzx~~KJd35{WiJ5JLeAG+p@K~;fX0#r49i#L=#B2392-{|?Na*wcm`ts{r$;zt>D#!>U$*;Z#Kfw z7H_->$?*r%#8y9F5cTV_u(P~W(y{s6HxgzIk78;U$%oFZx_qfhFng4K{wqWL9qo%i zNm;XnxxOE}NmhkDieZ$M@5?sxCeoiuGg^+E7Ak-ZMgPlBRPKVS#n9uKJdF=FR?GG} zrmO09BBNm){a~(Y6JE{LF0&>3>g#Zs|I^j~5O>CSpI@4(x(VHTL@p>&o#bjjHC=jF zCim`Ps-m1Z#F)U)>x<`ZWc7pNHaf0%(%B?<9kShh+~6AJM%a5+$oC!|d`$&0^EEr6 z_hgRUYVZHefbVoZckZO$JbdESu3lM7Mx8cP>K<>=PwS`|eWPV?z54yTU@)nE$n$`; z#Y8H0_mPA+(DXf~Ko-$&q&Fq+eidp<`d4%bOvvR^8G6005T0K__WCfl9;$-mtS4FD zmvP)tun;#q<^S6skZvxNA@PB{mKXHda_Tgwzj}gv|2>24bHx*p zRG&%j9as!^ADaEFlBp1^EIfSTzdzXah)l-txQ`Do)B0E_Fd2UBAUIq8)l~b$e%S${ zGGh2iDUqG92mfKz%-*T7wo3Pi54^NHoz2_x4W1Ok#-H;AM91U0>*Ug3<2A}wg?Uts zWK}=j?!QdWo8PK@Sb$w=U*7WBR3H6w zI=JBX;GS9LFZ%mI<7I@_&i|IjZt!nkpAvf6mvDY0|EiJyoz`0xk)MJwnlMR<0%=aj zdOXUeAg=gG)6!mrJ=Avyj}^rq4?X@GLL#X0nYYf7m?)CKvkl_1JJKX0 zrMc^5?NbX=Ioz3xEDqYLdrbQrq5z`_qOeorz_)E#yI?#6f*~Z$7pMrl|8IyB~y&HLdDEGGN__11(6Q0?=>BfI{_HY%U zrF+TWH+8!gy~hQQL3eJR1&=OLS>ezh5z41@3!LzuZdwJ1CXG-hW$oX2^wAMSh@!O_ zjRyiVPda%_cu4;TJ3z$0OUuSf?;2JtKvNZ+(=%inIO&Dp)Kwd6(!+Obv$jF_*q5fu zWMUT$nXi496%+6(IriYQUfpgUhV-J(e_* zPu3qzX;5P5)!o2G@*u>ptp>~C#{;oVmfGPh5)*8G4V~gUCahNE!IyE%?)*2O#6W7a z8iu}s`Urk+^i+w63E@*7urzmQhz3yuH$#Ktk8&>lFJSI{>GB0=2H>1yG!zWD%r934 zHRs?(euaAG3#b5zA8nnWJls#?O)y*lIxs9*?0j7<5|mXlodA9RJdS_T9wzt}YS%4@ zTX}n~7k-l$8D1#!n-xIUk)C-Gf*>15X9>gZ(S#@F!u(Ds0?7IP{n26KlkacNuEluJG@JBrcb<2P?f!7XNr`U~YAf%F#m+Xd9#3BnS_gSEvak@tBsIX1HZW zIhv(SuVXHa=Z>&hQ5cB}u;1--8F3cGNy%~hF2P}U%4M4+RxXhH<_ve&L$)m7GWz3I zod;f5J?PysYeLssyNYF2XH3{ zh%5Rf`0@cTYi8<2ROoglXg?TUi}v6jAINh1$o105`&-ttpH!8(#gO?3G6yBGP!$S!GaqH^|JA8xvEJ> z5V2MP#ez=4i73iG=W&7_b9cGn45&<2Ex`hfLSF=dH%5R5GM9olF39h^*7(&eh2Y=c znKG`2pXhTIF*`xj6|uOTP!xnQKb~}AxmGDw++!z^)dBUAV1U^qA;$~vg zPEoQLgI=h)`Xu2yaK7Ia2WGufGU}pIn<@TK$Z=}zZGc~6;hQMY2(1hVTFd)hz==eQ zmNILI`;Z3{IPkn3;L)^b@}!4CC5$Ia%-KaWu>Kw- z_GDPG82G>cW#8f1WjpsTpjXlH0xroL%W6Rd7mMnU?+tAGrob!xX6SH|kVYVhVf-G{ zP7I9Bw22)JQI&~XoQOlN?pl-Z#d{2ort`+~Q)^jCxS0!SyZ-=;2;pT+_N?+wH)ldE z9S7M{t6m{OAysflVC~c`)|SNlG269Ic5Q|Q1KeN_6j-e)u>(@EKcM@My2d$*M2b(S zUv{hquMWOS4?dr&1$>vF+4o$HOT1dLSvuedo|4Km#n|#wrZFp2FTsW+ABGL1+DKUG z-FyKlBmHVzvQ&w*0H1@Ny>!4gPtfL3m+Yo(=8RW@Fky!?!|Xe+HO$2g$D~HYYb$rn zQPv-=^bx|PCB!*NahoUZv`l(67zqJ)76Aon-dy8{OnYXFI64o`iu5z_E_9R2gguZj z#gCdgt67Kco1!QM2<&jZ(Ev4R%^5*{a0 z5lXV}X(x>HJ<;Ud+NJ&~$JG7^r#dfK! z>Du{erQvfck*!0`(F6=&&3A&}UAIq|N3GT&BI{Ii}nM+FZ5;T94(k&4KFwOAsH z@yDR3lDq<;i5!!GVGte~9LGkuB?VNwqOZ5`+z4H? zfmM9GQQ|29Vixt>>U!kHt3hgv$iD%z@9G00N`=TE^euiaV8gYu(9YCag?8k%*kdwA z1T4o}TB`j>=d(XqVS_+}$jFyp*&C$@5=4enfvr(7oLYyW^sPG!u$NCukOTr!OGI5k zq;&a$G1!?c4KU90;D(L1BGoRPErmG5aMK$e?RxBwJO7}2v;h)!2GU=D8yFnQ=J@nx zZ-q*hz|VcD9j+4PM@-vP;>EtWWxOyd>J?z&T^SI^W!z#u7&X}wzEzbYZHcw)NpV6?Lc9h_d0u> zarW&HI$QM9uC3%8gki@B^iKIX`wdUPFAgY}qIr`+nP0ZGjuunO$1Acv6(vqud8_y< zO$uca3#SGOKOi?5TL7dZu|kQ;@m0J5)LVvilh|4grN?c{9VuI0_wr&(A%~|)Zl?kZ zFBVHG`SFH1eNrUPMaQJGIUW#hO2T@Nl%}IhR)YwU=)vISHfB`Jpz(Tw+$nC$1Ksel<_|P#_wBPHr*a81+xUS; ztzliI0a?MbX2B{Bw*^mW8Q|g-Ml86)X`BS%RjQl0#VHRagS01W%GMz-*~zF&eRDyw z)=>#K!#T+sRDj!%As{2wNIfy~-Otk#RptlX1{W)pkg>Y#yNg5*UvC42H>MhIQ1!b%Yf~3KlgyVd7*_e(Kck6 zuR^cw$9gdC?{jT`N6*3 zM}LCXxeeBx;j>&Aj`U|zeok7yx7XnAFi{5_zS;ViCD02~IV{(mCZ{`HqAk5Q%CtJI zT;rHoJG*6_+Aa8%+t8#1nQ%dghyv5}qH+KGt1JptV>BgT62UwMpID z+88p|gYYF_G=K*CRMvx|FS}zqsB-9p!ZMA6R@Sje!NWAUXz6zmz1dOdIFVhmTfovr9`Few$1bM6!NRu@MISJa&A8gE<*!|IZ{Jt_S z?6B8|nE_C3712xZ3BMr<(%08%zUM0RM9>tYQ~)~-I(NC$2?_GWc|z^3uk(#b2y57-Y+r96aF z11gtlIv5B&vhd*50l{eDMD5Vz=+3?oyBjf`jjjd(d{7>Z8U9g-pJwo968&&+&s#1O z4wZqDJ$TkIQljtowB^k+ymM%2OIi8bxMa`A0pN&Yht-IXLfc6xnXw!nVgQSx0KX@g z<*a_1c_#~2SF3kpI*RVQ8oZDxBmf244VZ{4Q^7XL4+gN60MbuVxp8=qR^g3B;vSI- zYeA8d+`d#8(hsp+h%mQND7GsYFGwLx479-+jV5#@q%rqAX{g~OlSTXr&1@FY2@(Nt za)8&u>Tab8?f|JAP;hz5L*FugG58$EE!Ba%A$Nf0F(*R^GG#Ty0SF3V(CiT`W?|vR zv?TTF^TLEXgOY0Gj9L=MMTi_h&{tnQ8|EuhS2KWk`CdOPm#A(m;{5RH8Y~8ddhmkm z^g5w<@=Gk%HV1j`)cdI>YUAAd>3v<)rEvne*}y7Y`5OWC-t(v|hE^!0WQ0C*ain0 zN;L67>7+?M9uoqZX6OVBsa%%vWcCP8pzlfawW?qo<=Ujb4^8e&Z_(O(f&gds;27$( z0j9>ld}=m1cyR497Yg@RVcZ?*++W=>kcvu%)|}KpJCmCDOK1F#+Rmu+KWa25tzJ4( z=T`|xh=Q;^G^dclIeqLT7GcSnDDEcHtqiiA=*AW1Cy&1$Gm5Y9HNdiK(+mTul*Rye zR(3d=tJ-_&yPgKk>Rp65qzUoFP~1j%$28)Pi;lBe<1HmZyl)hBjSUdp#_}#nm5^3E z|AUf=zJ_)(;dfSEXtz<;rH3o5z~BuiQLiw0xXB{)zY7xLu{^da!=D<|z*`Z4anN9+_p0~|m# z_Me((Fvr8pE^jn5A>Ue-)Df6)U@nuY&rs3xLQ*PFn0=&y`@Xb*$r+-$VC%u6+ zOPETZu&5!Ut3??LmE-{52Xa+CFjic?Ij+7Z#QuX;0_)6V94BP2Q+G)j^7R2@jgYb& z^>QGglXb)wp>JAG5;z$;IMF`C*h5loXZYN&SIR_~-gXG>KRjvxWk4a?a=loA0Z*7j$XfJ96`7 zB61~j$$bJSayEicG0>WaFX~)m$F=Ex`HF}0!a`kIw2D~zOn*DuqGCO$C1%VYV7?Fk zR}JpJ8?;39O6b~~0N$X{P~H^eR`Fa`X$DIrk1mVvBUOtU4kZ)5(2zH$2KTbN4Qxt7 zN6ZKhL1;|>xRpZXVOU8LbvIV}2~YGK`bP3Axk~#Ni+$1gy2>rSPx^{imr|fFcjgY6 zH?BlK7NlWsUDPK*Kk%nqjm4gSMN5?@8mat7Lz9%L-Mp3`z;?=hH#FpSykFSuJ4$an zaJHXNUT7lYLCTo*ND*M_>nSq9p^xFa$Qx@wmfzULXiYXNT!Js&si&G72*DT+a6?@b zL}Z#0jKdQ3`$1Gxc#!L&2LmI#Zt6zheBDI@bE_5Dqsu+RE|9Q${b6Ng)02IR3jE9% zbXU(gNl)kXU1HHpYXKOli@eZqv5UHn3RfuQOGqF^i_epQe;;QeH?Rhxm*MTd)~ zfF7AIA8)l0q~+pbnXbM|5;z^4-%~iUkUrq9RO^5=>TOBE+lSP1H#(Ev7O^HRJ~0}q z8mx6V3=gon1N;ctQ24oR0wACSc)_-Rx(8kV8J+(hI2u%`e{L(%A9tjUp$`}rdfwBERXq2v+<=(%!QznAM z&7;3Ce_(v3`UT;F~2qfLUP0txM!+jB8KyF-EPs{`OQ6tBdb7#dK!Cs=qQ%RBu~0IYyBPjWT#jZ0X0+Vb z1rarAVxZEBQ`j*a3WJQN>}4Y7kB%9lNmh~dX#jCyNE!&b<6WCtOA+;~k)za}Bfi{C zyGG>gcsxEV0533O>yL%Mz%DtJ@(ZPC@x=_|0&g1@G#Tht!{V5GsmCFgQxgf^WRwI6 zWS&qe>k;mLJIpaOwAXhXyg=!08_mJ05txZSbhX>l4DGh!bXmgJ&3Ej|udyS+gCZKK zL1{F$t{|u+%iQ7bI)ajqU7k_*Bp<`(s>OHGh7XXBeF@46xd;!C{eV@jAfBn~NP;Kg z%mWVzspibn_->r~DwK%ArC=5r4EG|qRD6Wqh1eoog$~yBmQ)sZ z@Xb_dpIAkmZ)fJ+OR+!uy=}X<@W_FhMMz6_jPpkFendN25a)L@$&})RB*+6QDj>lY zGpY|o9z6(Vi}kh-rD}FsMP8ZZKw#Yn9CI&sX%k-0&c&)6m)zLSRBuy%6*MM={3Xjf z_-IB)y&ohEoD!<*K|XLRz8cHAn!A{Df8a`~ZVz*od%2`(Sq3i05B)xg`dkGDYoTzLda4 zAv8^HXm~z;2DD2|EZ_!;j!MA_ESyo!SNrg1l5LFhsvZ<58X;obp{u-YbV}W`59K>IbfR&KttxK@w#H`PCEK*Xo1cqh zT#m0?)@~6ag_a<3N0*v4~kM z)K}3Y+4!XOcz;=NRQQkSsLm;f^euZottkIJ20=y(wL>sg&E0~Ge3i`ca~Gd0mBWQC zdqxy#O=141`###>>hPd%Mvl)>E6y6e|!GO=I@P6t`FSbu`0zc}d9 z-~LV+UyDHZAC#nQZwU{%Y)=LG3sa{7u17)VI4#H}&8N|mBZ{wW=U3%ONh_m-vGaqr z6Q5l-lx%1Wku|D^0m3YL65R(Gb(C6&Ikcj4EKRs4S{+FL7_30vXeh{7_fLdo%-#cU z6LMTHr~wT{J^BxVN!hUlOB%2NZ>tsyHZzrxPWQLfAAl7xP|hSD(dq|88exM#L3p?K zaFz32oQ{Pz%~RQz&Dj&3%yiWt9+-J;&3v>@eH%Zpm?3D#E#6v-af8zIA>t=$4c_@R zj0gIC>eT)qJUF8XSoiz+yT&;#?6GIGkUQwC2iLn3KvyUqRBW-#FZ)Vg%ohyUIr3mz zB3K=FT{C9=9lBpTBmAaI@e*>BZv_ZWv&7!lF^4fqkcfb*M>*m-AT~_a}-g}|F$wJ}n2T6NEYYEaBi#(st3W$2SdtAT&u+yE)2t#{jYQoGk(`@Uot-5wN z11FFAu(mRG?wT2pHUu?|A}A#-1iI8O{cv~}guev9yW9a*0EU``Zg$19JtH;60aPM! zk}IbF@Wo_%e}+Hg|9##G7nH+S71N+hrJd(1ng?Yg8#jOJ5>euLk$%XpbPFfhN7ANxuk3YyNGY;Kd@bp4DLI3 zl&QyndzsvtdgRMq@*j1nTWhV*OWH}yszwYSUpV{wh2SEi=+_&cvlsLaprKZW-0S=ysl9hT}9l zloQ#=#uOr?QL+<+f&^M^AR;)(f0aDT^6-9relhvk>D$pVSe2%K1j)3uFGfX(=2|pmSS1S;arvv$&u=r;vK&s!I<0Z+ADOf=8 zFIiOZeYy2fA;nV2q}#(n`n=k8JapO_$Ov+Ua$Adxi>Ffs`WWgDbCP!d!!>XSJ(w3& zls~`W<)yr_$>T-*kVZ(tEtpu`IIx)SJ7%GoT8UXOYV$~rZA!nOx8brulM@JN{m|N6 zpE@=PpyfOl!;MSjL_zM_a~#CFcA%Q`ZS~keAwV}m&$5v=JnJJ*pp&(2EsE-_!>nMs zc>?|09}f%^IsqaDb8!8A7YZ{u%|wo%twPlVaC?Fa?=E;-sSD_H3m!y&p>Lp|X~{jS zl7k5Jrl_AGI>l=rj=LXh;BpNBFbnf%&5~2f>zNJ)64w_1AAOc{R4yUNaNfJY1UdlpNj`Sa>@aG@! zyShCc9rOKgWj+a71pXY@Mt-#$>eF=T<8s+w&YyO2^f-q(aSaqnSgh%FxLg{rA#Ly{ z75THDs3c|-zr>DG5WA~J5inJGI2a7{iD7}|p48~nnncCZ!>m)5qmU**u{x*oh% z3`9}O9*+VFF^qDc7vIAJ-u0)~a|3?)*wylg?cGK5I^q>UFE}XB2ZO!Hr3%4NyQpSy zxHBTWh=LjV#slc?hgfZIV#SZa{{E$1aFu1iVFYSM5~w?n{E5p@Vsfl~do5>@wfJj* zQG^d2-90@EF~QGqk_IK4K2iBm8}ondAOR48enjFisT(kR6rXlQunlNlae3oM3{`SD z<6%CJ;o)xc9>RrT4_oJ0W(q>GdjPI;kl)RBiA((Q?DAM_fBM4!m%P+yb)iRI02EC# zJ@G1LErHa>lhJa6F<(Ea@%Vgi$;Xg8f8;;4@k*M$eIV>gyh1d zPxW1^E_ye7TzxuMA3TzmCq=)q@GPCDw=c`?1{wodj#(qwKvV0nu1H9w<&?-*hVcpx zDCPG7U@J8MXD&pe0c_jOJa{n3t27t`bp0=CLtkjBW&tizUbt~NJ;7jPo^epcf4lc{ z=1k6qYZlh;LiCJ*%89GS4Ah{H$e)-H8JPxO`JnhHC?5!>XDga$<7nqaW|(NDmwkpA zdLJcyxnTYzP^qVvS`ezwNQho7cXWf|i(ECZDle}~lG*H=_gP&gUV+z|D!uE&>pV+a zt6XP`FcMJsjSbprbCx;OOrL<8_Uy>)er&hZ!(#38Jgg&?W`2VwoRf4Xa7~G(X4?;V+Vz z9|`Bj;)yCoU`%wuinfD@^)I4BhFh<%rmOrAc?VmRzG3Zf=^+j-OBU}&8l#s1ANrpl zbMyNnhbv|{Gq@7!blwslPQ($_KUXV&Xc>phkPuM0^AQ!GGCH^gWC<3hkhb?cnjZB| zRAF5sXO0j2tI$(p@723@5e{d>Hw$9A#73ZG?tw3sazw?PWQ8TnSath~uy{f=m70ko ztuha|2O&4KJtv^ZA2Js56Z$muq#G~gs}0v5_>J^NPk=-(vGr}uydNf-dcmTOW87pD zuS?4NtSgx1CKE2a57KVVn4hk*eH~0p5Z7|TPzSQI7*E_G!`|=fppHf6J2Al^3emo| zIuV(0!}P>c554hX=k&pSHsEv>4`usFY~Lh4-<19fl26bK5;;%!clIO`I?fwBsPrEUoL-wA@mga3Z>8KsRBC=fX$8APko zULAcgd**ET4;x%gKmP;d4Ph?liMy41oK54V`U){?(phG+b--h zDb=Zbg}YmgmlaQv+p?ktM3rVNSbwR7U^RK+k^pQ+(_`oWPByyw%>uR!WdiN71{9acA+=#J#-B)%@ zD7vC0D4LMw17r+vFD#7gypKmWGOZ3vg}idKK#E5l8@vhRqr>K~iX8`P>&~M*EU95G zq&L&~K3F^qVM&OOB$gvH6JPiYLFQHF>uYS%rdI~bM(xxee!Xfkbf%a=9+BN{YDhjy zknTeSFyik+_fwtPA-&Tn30q*Yt9Mrhzofyz+ZnjMS!4(mI(Q#2$<*|M^zKK@QJBOrPr+Nuk)!9~Y71L>roPstV+qKd^)#k?;Do-vGgkewrxUMn1cpvufy790R;;=rNLSl(;W!Dt`T4q*30f=)X&lvso|$VgSW?7*XEDJQY{4LQN<(Pg zg|ND5GSIX|L3$q^p=KQFgm@TP{Bh$N!i;I;;(x@4fll?#UJEHem)dPiy!qDfsMN$y z3QEHSG5s_m0&hr|=1p^=qBIh&L|;nbw3%)fLng0mj~%eo6`iaAHJ1fLlW7JiT}=>2 z&KIo}=mS|_=UW#T2*~m$5xpp7FPWikr|}=&N8fD!V!cIGoEZXPYRJq{S6SfAJdsy& zkTn2j{2~#oXIRNK%+=B?kNtwiS%;+Drr)O&e-6AppKRpQIpC_Uw0N?!&r={b6q+qOIxA11l>>~i%U_(| znXO~J&8>+?iCgwal;lz_{Wmhq_C|8FQHQ~X&?q?%7A@^U(2ZTkgOrM+e#nMFkiV?o zeX2u*~Gj@iag&ozkSU9QTPj;vAn|?7GnZyr(Mzs+Ve-yZ>#%f_%@X(0eiR}8-d-g#0cCf&Kjn%C zNh8x47F7t*d>GYsI&6GToT~s-R;m%{ zbrY-n#~dz3Q2}4_kVrmD35%?bz$Fa*U#>N`AFN;oMbIW(+YbdRadzU(#RA^8;u0CP z>{|pU`7TsYvg;*FLiKMB<8P}^@|7=~H%j8F*}T%(N5g+mi$V4x=P#2<@JCB_<{97k z1y3~CzkcTWDPI@>uhdp5=^CH~PTue6nA(Yy24L)yowUm9U`MiRdMMTLdZu$XH`c;z z3?o+?(GWG^H@kbq#U%YCxEO6IEuuk}L9b^hRsa|_dckv4;Zy?y^cSuoSsqI{Y7qrJ zWu+@+t!C2T{R8M|-s1g*m;^dZ;M)_`GT59B>)*hD+9NcHgqxAOI`_Z00%#7^BV6l zY3EOwH9`)JK|w?lmyweI+kMx^RK>%rF?Zs)OK3UpKi?skKm8y=uNDVBd?6zv17J27 z52A2JzNJA(8tZKeu)cd4K>h$JbO@FfC$R-FWa3L~T*~bMeoIVQ2mN6@>ux=n8-u-L zdAu~7*qx*bzxSThIwm&H$Pvoely5cvUDT;6Vam_}#|MNoP96%=-jl+F#GnzzpbBC8e8aGRpYTIDg93&5q$p zs4s_p6dcP@{aMFjDGm2`bJ^;f3?$yANBKh26b2P!`UK(tX?43)qKAo{)p-t%_7hGk zfrQdtxa6!N&D9zJ@xcls6U+3Uk1DZB6`0x zdQ31qH%sq{@9$vj9uf@rXOwUd%?+0bI`k49984ZB8QKxd_(50R>red5u~Dz(x5@vO z2W)rZU|c)QD{m5Dq6Xf@BMOK4>qbjbzhuD`G8* zu_$kopIxSaE;3AfcsLQ1Z*Uk@{?JwEs6!v+Ys%PsqJUje^}g%p!K$$%#heD9>t>?L z0tH8v#IO`tGI)9Yxovx(pAEg;_YR8}4wgv78XbpWxMLfr?|#8wNKKNb|5G*!3yI7A zgS?4hc9+_Tfqkz81Uc6>7w3*y@2n^PSC&AkT6iWl;n^x&& zNr>|@E(nb<2Fhe~O&<)cBR2;LaP=vC6K$>`jo|3FZf>{M6y@%FB=lPppuz(d)6Nd9 zQN=g{zsHS)PeB8SkrFZO)uT&W7twJbbzvEtRcKlQp;nXw(Tha)ydS_DPown6%J~X+ zLuaWU_54r%0D=k<3I1y9Y-$k7Kwp5?)g=rBR97EyGr)FU2X%iH6X^W5pl$(yQO(XB zWK(cUh2#?PlKB!V{OF4WqdY^zyOs4ut=nLLH2ouE*?e z7WsADfuB=Alo7OPA#sIH^}m$8fAfFC7pB1&^5r)0W=f+l}k^j8Vr<|bhYJ%rc+IJ=+r>)>Y$eV|3@x8LWT=k9QK=1O)t|;FRC-KttC|W*|t{1~^B5 z8PqzL?UYccq_F*XS=DSU)6Qs;XRxs}nA?CuJxTq{MBex4Yc|x+Q8nNs+(%vu2?!m~ zBty z`O!H8^-^6%UwBEjM%pqN(D!qiNT6RE5YXs7wL}C{mhn4MU<5}R*|2DrfqG5&sE|e7 znM6iOZdU;;!;qLG7|7nPwB$v{cOhO-Rk7WBE|zvn!p1l+7JwRQo0 zhjy`Dc!w@q>uqtH-UJbRAphw7Qx)19W=MhHGl`a6$#qAwFQDjr%#bCSHb)AK>4I+D zW7>qPz-_>+fN%@Ck1V=cdhXGH>Gr3nW;rzvjr$0Z<0~p7lz^l* z=i}L+T{!As%-dYlsZ+~e$b4+(+$WpF;ZscnjnKl(steTczKPmz`G0ZC+Z~fX%CxOM znlJ9rWWGxohVGtY!{`if$LY27A@=S(gn1eGkY2mvh3CDWvY;N40}J&gmXhh)AF20O zHuL2>azBm>V0%&*q|j*^T3J5{=AS2xGjf%dxK}s{qLv|5S8MyQ8Gq9bCpjsr+Fgddv+kI83H2e6kp(Lr5_eW84c6`8%zF3mW}NeqM&P#YoQk+Y9t z&5&wkR1;*EbmYT%h6KfBg$hbOfPTVY3AhLVI?3ey$_E~$5NmL7FK>5`katv@{(;b5 z)T&akcy;LVH^<2Dnw)47B&auaMpS1>fm~*sQ+qUt435ZEA0n3EhFy^RhC$hupG>mw_sFwLqV7kk7zzYtnNVmU&!OVLyu0WY=#+iZXmEmYoXcWh;i3+7) zRLjbiltMZ6nn3BzManZ^B#DB(pJc@7(dr)xc0-DqhIK{V*8hqLt4lOG2~SCtWuQM(ppBJVZb=7@H6aGq?i zA_q5wbC9>j_%n4#dD;N`Zebq0F$~)@iVWZa&9gR_V{*lfxYeJr_U^saQl@v`m#vOC zh;!TD&1$S#XVxEunOip#PqU91qI#8xWA)s|{K|=h0hRSbCG|obJO@m@&NnT?JU@Pqh^lNywyhw*-Qdh z^Zfp_*fbwuU3>H+GW+MDX1K_Ed3A2+*w~atE+fbCutniSZcZ4| z=@Y36zWyn2cd!{+jPZa2XjGJh47j*wuLoHh1xEBH@SfOE@`=#LLhl-^HUq16aUaCn z03}`=t{Z~6=&KkNdL6?86UWhk)OE-P4S~S<*`m)}y!Bb#z1gWLx5oH=13r@sP$&5-@FkGu-)YE{ z8Nz}&=QQR_L$<33EY#PU*1k#m+)I}Q@dgs^Jmfd%2Z>7y20I)EIu5u$_hzoR#|j3OXF zm!FqGAoYIY#>%+q-rRI14Ovuv0|iCI2ZecGh&)Q#hKZveH@NA7s|z`-&XPzYP!@;O z0522PVu5FfXmv(81IyFoegkPk4G~u?4+&m7Nv(f|qa^qm2n=U3Os>d(KkS=kMVUe4kgT3Nb+%pq#LE?_A-2|Ndg%G@ETW8pa~XK-t|SmW5U69|%e#%!~E5na1XGN~pH2iX zk}vx%q)Iq_Sn85)hZ`xL;Gkhv4+PBsx9E^NF)UhbRobx>^aHr{ifcdY^xw$Rp?cIP zwjd|Mz+K3zM#|LU4B%8Dh*BE>P)HmVE%{fcW_(~C-ThF#!!$&j=1w_#t*!a6w2uY^d2r8++I?Qqq|*!}ARi#?e~8YIKab%$@&rTu zmbd;a|DXtC*o25qZwZe6lt7v{p{Z_ItkKmZ@lvb!cQPS%lu*wcT1!MD<4he6NKA_C z0@g`+fd3wu%xIsZ+m?>b>w?}7f3(Jk0xJBop7YD@a z>z%x2zMJs%s%83&$D~^~gn-OT*4*}&2z8)^2BDWq{p(@ce9>DKNAUa=&gvDe;i`In zv35&T!r3OJJN)o;lFL(ZW!OC{C&Jpr+#MxvlE7FfjR7*;h)sl@P#h_6MM;Zy^)9u$ zROgy1(S+ISCno46sB+LJIUrvsMYX8$NtL7PxVGljy+3Dn->63Fy;D8tl1ybv?!H3Z z`Qpz2xHd`#Kn?BRt2a(p<5_~KhcKFDT}z|~alHH!>Zr(cMY`WLxiS`Z3DPYt(-LjL zpy^??O@}Y_egrRrtLNg@Ka7ZElaHnyB!1bKUs`Iv6W`DfOO9W_$&#hPLvx`2H$XuE-h`0bTmm8uyG=1n0b z*^W_~MSSykIaK-nAEa@IeWv&Zy!yo_oN1@dx;2u7}6;%Wc<%bE>;n6&CjkwsBI`XlJ!gYjDnQu$VF4Y**Pxar=dK{|>>I&f+g?4Kol};zl*^9CIJnP**;nYcIRtKJL!MqJ zu$p44y6kv1NI5DLN9T9$u_5oG37DY?sv+o!RaRJP)`TUdqoNCgov#m zJweP9NP@~V5=xH~^d-;QYeV3EE!7=Gg#j{fX=Gd3MIZXBa z7V5Ru_Md&J0Y>mz`hPr(Zh#CZdV-exI;`@#O8Xca1a*`E*-gJd@0-rB*2c++M4olR91`{LA^k?`jc-jDai8BbHd&Q4OC6 zI|Mi1w6#KcWs-BxhvSf8mo^M7qqFAAhW$AlNiE8!ZGt)b521qQ_Q%_sNO4$=r?^I- z`vO4`^iPcwqg&T%3(7VjXMrVFxSOp_c@8Wu<}UqMjecOzy_HI>-mEgdP|FHJT8-rx z0qX9#wiVLn*+!D|H7=p*<=ny@;q6c=<1xStP6&k`cJBVo5Ab`jf$x9gxYNoxoN(~7 zVsPQfa}2dyc*wIy>{faWDs=d9g#8fNO~_wkx6!zQA9`yK5lfdDs)ARCMAd0_wZ)zC zn@{2i`hHzX9dwH*`M*a~YfsKqW5GL)lPK8!nVZV#nL{tFOi0aOMdx^jlwbaQ)!J`N zOwoYhtOtc+u~hfjStu^v{nL(kQ2cu3=u)VBw zxyu+lK>)k~=+lJmIjd-Dp#0|-2}rLXF#fBHI|BIv*Al8+elkHm_Pq!Ef)Mhz zToK*qJ@aZ6eqp-F?##KVWa++PX6-9 zm-$qzQFRxIjGk2rzl@C$TNUoIu{KNLjm370%S&+@X0|@J0x$PA=)d4EmS9L$y=xwo zl@L)uL6h4aPaC}}EoQL3p4nkZaOdN(AJsUXFFVsu_Pn%?K5!U=C(&1FeW9xu znyB+NKC~YckH0+s{QdI0_QU8-7C?$YxXvqb8%nSl{OXTa$^iebE#VK*%TJVC#4SuU zHkG)z=1TeU3TmNloN~lFu|dI+mX?{9O0&$f-4kxzn|qDj1?8?6YYbMO*o>La+?3arVNaeo zubBpd-5;EwMH&%L_2r!k|9U<|FY?>q=%kRxoG%y{i7DI@y2531Tf~nX&t+trX*+4b z83rKcg|X)C)HGRBCnlX#Q9YySt#8R!OLMWE_lj}-;G=Yi|jUQJ8hy2{!{zblc zfsy(iLpqtPMbxHA_1q~7e}*wT90zxhGw+91pUukoE1Dqk`*`eI_)rZ+`@rAN_bT0! z9|*QtC?BMTgX=6AShfnIazk~-*%%BNaZ-dCR-g66w)ExNrhr>_4^pRZX82%UAjMdq`WMS4 zdA};npZ4YNGs~-xv4gu8UwO%dwoG$LCr_NP~TiV%+HiC(N&geG>)9}R( zXfH9Xkf`O&iB$I8#jA%rx_d1Y{AV|3zVr4S;Jqai_H~He`V9+&qOXK=7wlqlxz>!i zW$+;SNx*ZwySG_U3l~ITChG_B6(7zn*39Jt9&#tFyA9r3q{Td=lrYwkg zrtao;YvV0r`Smp;n8uozyK~CdFdOGh+U2|S{O5-OkK3M=2ZZq-Wo?~>MO&efe?Io+ zzMh-(V>QBg$Z<~{&uP!Bw)SfkIe*-!*CRbPRqo>5X@;y8+`%mpGSb1^Yp?9WMAaZY zdb#iIEoP>pdRnFpL?D+3PO#f;b+J=vF_G@}qx+%qk&LnO05D+%UBfw9uI-x(Qg0@A>))cmqyrA$1K<9<;?8X>=1hiL49VG7CM>Hu0nUB~ zldz+06ZVR+&T}KLn~a&rw;%LeO$R^9S}p&qBK=MIcDhkS+sy;es#0PTxnp3LcJ_v( zd44ERuHuQzDP&NXD@W>)Wx~d~e{bJ{<-R@V6>czzZrK{`pHS&{|2^$`2c&J+8Jps8 z%jDge`;s){!tCGbm&^b6Ri3rMV(ZOY3TItP+8&WAj6DKYqR>A~FKWw1{jVP)Xi60m z51U=u^BX$!h(-0WGE_omDSr0`S!`K-v?}Z5?E3wYlL^goH1+9`6=eL~c;j)r_yyxH zwh<1kY3+T@>6A1)@nmKmS%!eckQbjlgt=gYe<+GQjf*l|qcRPS4Tp1iTzI+Q$q--MBN|r7rzD9i?^5 z?`Z$NkCT)}D9-r|^j(bu>{;G*l(vtxj*-6qJLT6ke3s11#M7=GpqgAHe!Ca;uv3q0+mb!h<_qkFBrC$?}(gQ)!&$d0=Ax zIKQ$8((xJ}9AMev74`rHQ(CapoQ&I5Xm)w(zv9#^%iTdUrerHHoLIE=jzI64HXX+9 z-}*Bx?&$j}W>ZXsUUe*PZcjI#@sH7mc;P%LAJ!WeCVrRA8;U&Y*%tzp&h`njM0FdU zez&vrMCh_w{7=;A|zZh=qnbtbVKkBvV z!O`;V#eX=@!uVy(x}<@6`6Qx?qLV}t|6hU`Co|qM%Zg5suCcD=V-OxQBQuXyD6QV4 zhQ6ub+r_v{)unoqNs1Ap%t?0HjJhm8HSu`#8KUQ~=rR3fI+ylO_-ufGNxihB+o5XP zrgF;4i*DOz(|o(4od9upsxPS83}D8Oa(i%nvPwHzw_6Xh6m5;rx)(+LfSj$nN*!!Yp7@txU@8tH?HS)E zjau4^P2>c2&+;H^QivY`h4p2O6Fz-+!h;+6x^Mx7e=6%XjrAqZJod|0^&LHa3`YIXQHm)i<+rEG!v`k1_B>{z+j;#t6b6i53 zN~`tdzgVo7gQq;a+aYSCqD1*DWSc^`?sz_8Vw=T{32aXi6N5}uKDv+ngqtQ`IXN01 z)&?don8{im0?@~MSM&HRvF_|!@5yy`^-P|zs9Y0$CggR5_*NCd#%b3PyGL(pvuiu; zk$Ep6W|W&MTKVMbhz25j@w5p<-7HzH!o;A{H6DzyV{Gu1+XPR@C69(&?b|Z+mIniL z4(L}MtRDZ=)-I>1VB0naPMO|-478R|0nKjWq|;7&FOgTTW5j?D29Xrs6JO12ri2u- zw;~a=tB$Q$tuOUO8#fR&jBgV{E8MWyN9u?#KdkcT5@tT=@Ub>N=oFKCyi@aGZ`7WW zcc{Xk6jl@AX=Ndx;)$zZb$xQ3mtB<vlJG+JB)CRTJfF-DRh( z4!I04SBG5UUj|g8JV(Ziz_-UfCKtrY&PGkONz>YGNx4b2k0^$duFHS1ONCQzzG&S9 zVy!dJ_oZ+qbp3Wv$3m20MB@!pp7uG$}N-)=d@< ze%(2{=uMF+uG!JLX*`xaGm6ckWp&mt(emQuiEd^h(MF48*gu>!QhgS-zB}qQIubTQ zr$NE;cu^)*D6}e`1isxRfwlaRPyNJ}{O42~)wgwRmiyjW(l2D(+6jVB%TknP$LVLR zj%Zj9VgX{_)5pI$qR;?9tQ3q8P6CMO2VPZG3`?fQzqI2nKy;j2_GOXfj^7eu(OqAuJ&NKC4ZJwID+>>)>Vs)>`4%8Z6`#J#?x{D!uiWt6>=0zE z`uu!c`990W4!-k^!*MpwGz58DKl{yzZDhW_y3@5J$=1mYD~(pW15SrX(q9;&mE(I2#;hhp`iM(-evyKRDnRZ)P@oe5qnyDz+mxH@211g^J}$BTLx0-570AJhau5un+aDq=HL|(ONBrTvO6b^O(~QfH z!_JrX-z(RYuSDu|rOxqys#4_2O_s8~HSg0UHNgfw^jpV)5p`+$r*B}6otL9B=Y%ZhkS~#7`cUL>10k*?tdMPEdN;9!3CAD)*HnugpVrr8c9JBtJ+P zKNdwzVd)k4_cvN;Ul3dP??=lAXQ$jLzBSE_p@r)pn-77bPJupr{h!s@-d=WjE8Zkc z90+37;-8S}Vco|B$2H>{*G>_LbCeAgtG}GW(A7ri^7QWiBxLIe-*45P%W4F}5 zz>yGd9$$WtBD%3`*fkpYssE$MfGbslt-Vp2kl7}gTaNE2Z6tN8$Fy)^MCpdbheyM_-FL%6St&XuD^jsuE8e;I!Z71^P)#V z9bTU;B^3wpzl~L?8V%UDXZNGJj$2#V@qMs1^}pgulx(*lq%5iCE3qE1?;Ug2JIU*u z-@te`>8UiG(BVoxU^&|G_%~s^G@FE%NS%%((S#&fB2<>U-%=8e-BJ5}awj^RmWcV+ zqtgJ&t=C$cV0H>7aDw9URpUSR<@L+U3YHe%_dyU%*RHCxgZB2@q|3j7lWc>IW>|^mXzYXj`1p$9pNo| z!hQ3Z<1P!d;&j-eBlpxpjS_J=KplJevpy*IrsMK5bz{hOdPK8?$!3~A2gff>Rn&gH9ZG75?E zoNK@>rT?(sjnwoNI?O)-Cwa`^%=n6*8sr(Ti&b;#G{5W%G^;ejZ|4!2bFe!rc_o8k+tU-qugUik`(Q+b)0^fP+A zL7>c7*P&faYGkbba78$xIFg-dWkl~ppy9C`y7B_YO0(Avl=YR?l1xsUcWSQPSPz#{ z9=2SW@2{sPYe_y4qHz2qg#A{b>_&9k4~U+EV@jSm-;?&I*3sRZ>z*U?!;bN-&J(uinNDNMZmdqN1z|gy&ej*4B&~uK zj!h#t-WuIq*eQ6`(@|k7gzEP>6DtJ|)g0dbP7eu{R`}uD`YET}#%?(9XiwP5nda9j z(>_qSw$ijgU~qWX81LJ-%jNCHn%D|tcTMa%D@m?0=SD`mh|{`l?$A`$Qh2L2Cq+SI zy{i!?t)II^n9?kQfgp5b-vmE-x6VYg(XEu`3ZJNXyZ+aGyY<|u$;&_*hj=qB4V=G40ZL>6HQ;<6-23WGMdsl=6e4(bQ&~S!$NV(^N$NZM(^{ai&a(Htdvbk zmL@dNduoku$NRUGZH{Qs5Y>vQlc?^8=Z`0 z@Bb$7nf8BQfCb4*;{D_Mws$`|;3O;<*0Lwmc|>;mYtYbUvvuxp!_Jro3z6n;@BcBp zbmXS3PQSnQlwzf+U(%lMso-F(Y$Iv($he$1afE5Hw#kNP7(H2Mgb9^puU#(boVA%GWRX#3^3v=x#Mt`~OTvj))SqQVJj%>4HzMGvw}X zUrl?QyzfPL= z_HIp@ehtkm_PH{2_og>kD=L}x@OfP@?d{WAF#Q^qS?qgd>E1|huu;@J?cw{ne%jl& zwSM~bMP~7*ZN7BSCtLpXrcXtN)TMK7l6I}!={p`Wp383Mf=SD6u&Zg5Ct(v7(3ISh zBsWBLi1sV2qPKkP=jL)G7sijwTUmZ&Ab2O^+Ic>C7dC#7139O-D2V7ucSrEyTXiQL z92oPq?lTK}WBv!6Y{HB&9^5m6^V`%|nd_CAGbckM=kVSdkM9TaMl(oitu-D)hz_c& zt39`dM-P7BP?CHIQDx^?p~49tKJ@q1`^P21h_b^{>6E}ApW;$*@;m?3v=s6wPWe_G~TBD3cJ`JahBEK!y5>8-0bkeR~gk z^8ETdKXG5|lca%z)pTNUxgN&$IgsUs(@*@uXkPv+*vABezKR!63Gy3%BT2aUaltei zK))`#KdEm>Y(HVNBGQcOEq{c@pc;qM&CKJ2Q@G%-u+En63qtwU+IyydIQNPU+N^Vn zo6`;^jVXSmInfH@SXfyZlW+Om3PFK=?i5yVFO@n^T{@$|Jf&J1SO-JF>!aho^+;Ry!eFKY~w^HPV znvMBs5&t4%Rm*SB8AknuA|A`nFMkG(I{w%s{1vBBQ~Fkjo;cB9@;Eg?voHK{&I8pA z#!9NjNo?pKCo*QJcu8}OrS}`%L$s`&$xM3N98apjtKuPS9EDN;s|eKn6qcXuBj)D_7Py4ws?4 zQK39>zV~s(9sd!HEPNxNTE9*>*ILD&p_JO^JQ#FL?rVD0<-_|=ZphMF$2kCnH2=%~ z^L=4F#gYm-sid@YznnJ8qAhv(H>&qu8hr&$fZ}si+4DZUScnqiA#DRKb4|XJpVNa= z8K#p%Nq^XtVHfXfHp@hK;HM#9N=-AYTc^YdXRDjI$c@P9LLZbz`0H1IE|p_^f__R0rgF_VWXMv$QILmM)ozlSiCNd76*KE;NlaWqd+MT^8K$>a_K0 zbNKmV65lNPh16&c(0IaEu?v^?0=I_jYj(22)U3kdzw9lC2P?3c-zF&tCd!lNo5}_r zp%q5!_RU|0#*deiQ?lp1OOKNqNl0rMn| zH_ArFM*AfD|LC^X;ex4Wpi|muuu;F+)k^G-#Go11*6G^EtzD2zhj0p~X1adz_~P2u z+Ofa>7p`MhP3b)1pPlDnllvd3ub7UbhmJN{Fe}0{gy*)_n9|KI1RFoEE3`vTVL|Tx za<#1HreW*5=;8h4q`!CZ*Vggkdm{Hi1nbyK>z)d2j(l7jxmhy{O&4-&JDuBkTGiDC zHFZ)F-V!TZWX;~M>hA9j96mKk!})z=8Ydq@#u&aorT95ZyzzlFX8K6lGbPWT-1a(( z$ZB+$ZcW01_@WqU3@)6uKP@55$s1hwFj6)HUYv`#t3dYU;+pDgOD`_iB!$)nC!)7Db6uM9_2 z*-(Exm5%=DeQL<8`&(JX;;6wY({N4+i+jabD4pg#yyP^(K)LpaZ;wq%k15+yw~<3k zE#w^lEagwCoSePAwVo*dEp|2DXRVf02R_>a@4gT>UoiCJ1 z9gsdw?uMbLNNx9sVTS-OQ~={jwX)P@WZ?X`xw|j9vhxJnMUN?5NK{iB51DWfUL?go ze}vuS9~i^xC=P<;m{&99$O;9luKonkqhfqOLu8D^(slF;WLgRg65m|%H?Vp+8Zoxj1690DCNctF%mVW zWT;VPckC^1ea9pT*VFPgbu~#k5K{+rG$?fB$UeA}(RnTj^kJgHwQ1Gfao`{6i`Gne zRL{VGHS`&{zrJ#%Y52qNu9fATcfNx@0Nqk(#3F6$4LDoPhL;aUh@{;+R zKw0>;>2_ohQ6dw1z+}x!s<&F?2N8S(aN@{u7n9%jYI^Des)B{({8(w9)eVOaw@`|uI`aF;;Lm6bdK)$4U3e!N>NiEQQ=9EOYk*I? zBI;qq-*a;Yi4(iF{Ra4(teb_nTL}g%Nkj^TLgburLVQm&+)U`FsHnRJ@{%^%8L{>pQB_oV{Prx z((}Y|k530TLsb;?Ek~DO0}d_4xPG-2Gexy0-3KE}JRe|L-@&ca437sKY4i&xEm=+7vh{wBGV0X1IU4m%4k8v2&0 z3xD9J@Spki_x)flpX`}p2O<~$Lzo);S(~^m%lx;N^2=D@;z|O)q`PriXN1 zLA7+Q=jw7vTK$D#+YV&Sva8HYHzce)NbYFBF<-`;Qjx%#Eq8#cLZp#_8Klea)Mill zI5;1w7tOEEv?Or#H%M=ooo-N&I==%%^P3P6HCR)}?&NPoo;-;Xe;DbrbtNZ{mwJa_ zmOK26^7zGZ#utl}7?Hyc#^)j5U_ejsR(H9XnSs8-apL{pS?WQP``ZCdl3Y_ANpr1l zLL=hqFlkA|Q+Vwo6D%z+LA#I5JTTQuGOuY1RzEel-~L*TvE47lV)A&!MYMbgLfB*R z%bMTKgnJmjY4fywzTrD2|6gE(oA`w5k95O zZlV#7a-i;_yN&5#?ZL8+^IajSzY*iyEFuxq9m_775}d99sSM_9xn^=N15gmib-p?~ z`vY&oVS{XOXk&~E$B);MIbJdP6Gb)`J>r@bmLIJOY7Y;V zalB1pILL|6Qb&c)!or1pQx2Eg;V#tgnxy z(5Gzgf9S!wx)XH_RCmyIho+Q~VzU4gj|{Y$|8{Gw){##bk6sujQ-1ADGfeguPKlD9 zTrWm~Tz@~ftukF3v-4N1nwu1^uEH?@6HefUkbw|bHBR!R#=z*&23&E?UcNq@V!~}f zs$ZS$osRs`%F5E!Ls&!cSY6r8u+E4RIb=&y8yaE}1x|*q-Gy-bRv3>~Pok7wWw^PR zHrvbbGUF`Q&%Lj=^6c3DM zuqZJAI;TNO7ub>KPL(-+8|C}M&^I$fb;il#?f*HBv=}WO@!aiyQ09c;=nnXvuTRzk$n`ICd3I<gqNEK9OduIDJ<5V!L<*)K>KV_HM8fV z!P09<9cez}W{m&)`ze87SiWN$fo-9QbNjIW{Qh8c6n?1sUXYUfi#5LR9tDk#Aft-D zOD8>^y>ZjVlS^kclg1OmME+^x#uI}9^)?h*Y~a?jRL^V-Z1^5O?jg(d@H1o#M7wug z27@qylM~{rC!DmnQcV^%D47c!t%-LylPzX97V`PNBhuj~D3wX<^iv9VnDcBHBP9{FE2nUsDk@fK!+C^#G+xG7`jW2`7cp3F}@W@sE_}1IBkHT5DWB2|#&dUG)Xo`Kpzfib?FxcYzbq0n_Tr>2G zj_OCsC(LX@a}c;sL5&wK#6TNo40UipQ&oivnK4wcAc!XLRF&a$1Olo`@EvbFR%S#Q zHo2+-Jcb?}2I<3spo@cI*tuEB5U(H6-ax!)>{aRELO*b(8UjG=Xs;kKxipZ9(K>Q; zU6aG!X*4HDpUFI0a}^keF(q!)C=QdfLirGcky8a$WjDnF;CP~8sUfwz(3BuZI!WLa z{BKgDi0fyXComZ<6{QTi$3iOzy33*4riqO3fU+QZ!A{HzaUA(po1gtKt@WkUMG z;)KW$(NR8$kUld_d(H(7?1aLFFsW>X5z=#b)alP`x^AgZ2{Zx2yYXf8 z5Lp)mXz(iIg4$sTNRgBsr*qICo&>DA)WL1N@o*+YjPZM|}@-sE_T6 zAWI;-gmvdpPgfyDqyte#s6rpD3TRs#r-s|btGJZSuAbw`j;(s(_B+D58T8x$yRmcv z0Ohz0e!vYH!DgQdi!&hoD@OI`>00&bdCN)Ob04D~%z;2VW!37;6t+uvVhFou;V?&> zSEcbj;d%jAy}mP=AQ-U;`eHx|#)vSPTD?Pc)?_{8JGdb1ULZ3^d1zb@5vH5Xp^pel zA?IL;g*L|ss7M3e$9ivg)sQTVI`tAj9a?Cc5gA_@Jt}7!gRUIdxSF5OLtL-!(T#i& zf?7mhl^HffH_ip+o1V{Q*_#8-XS;rxU}0J!dk!p+`eNRQJ%g!dbkYh~jIP&qOqaGQ za-_0YSzv{}Du%i-4<rIm^*_9!WEq3MSU)#FN@g6_1zfMM)PdFB1i~Cx z4_3XXY$2wbAo2?r^(y#)_`wgY?m>7y4S%N);@>!YM9rD>1bX^6Z^$c zt0O*x%9)hdIxx?lc6elFj%_6!j=8T$fQI>%6kX?lb;ucP)~Sg9}jqEs66f_Xt<-P~P=O7x~ImboHD| zDGmOkAx*autSVj$rekESMK-&CR2E^2h3_mI0-r6QCpwj1Iqz0t%t$~try zs)wU@Z0le0FD`;j>68$HulO4S3T4qRRJ;v;3*}I?K7%@?P#e*}Km*w+AC{yXz?%59 z);A?@ouu4dK#f{juLz$o<2!}mFDNHIp_&dc*-{~rC*ryJs1ucCjJn<STn2 zr92Akq>&9aE6bvFiu7TSYe+b>`&}-&7Zq+<*f-9T*T{X%0&%I$C;nA zvxM-r& ztg0H7jP7$JSd`l2Kf3}k>cEnc8rxIF6F{plvKQB?~Hp2%J8*M5(yw%sW5H=@h)RfC0B&m54C>w54>-AlRvJNaNQ$!2v z&O+m$hOEYjC)yyfS=BiEHWAXPIeY*Tb2nWq+r`mWe4pSV`BVANWUL&U$Z{@$C$LYA zK2pmC_&`U(SOAYqQqo5VVqoYYXihYPJTRH7!8B<6mJ!L%m(k;ryrMvhL;95x#1pdXr+kAUK>F^$+gQ@AE4Tl|qNUmOL4cM2g!Z=l}-|(WS53HUvs7wwS zs|$Gd{xjSzjo1Y7CC4R|!fyT9LbM)G;YM&BTqwN!{LbzZJQ81^6D~)D*A5@R#qWAf zt+j+5W>IbSO~{+39H@~wAokv|Z>o?ia04nZ(>NmRjS#JxSlmx9G6iAXVyWbeIVlfv zBs|V<5@t5M1XOKF|Lr|*;f?DdRe(l_Fe?{b!ppJa7YYy+MixgMqGcl_DbViMH4R2R z;spkw6~mu@54Xc-e~b8~hm8%lBSA}hQn;Hi ztd!e!TlcErdWj$!qjf;G*j;|XV)O>IjNx=EE39a3_SrQz0!HK0^pVmZi&xJ@4nD4) zb2Hb0LvK}m)-F|u+8dF)6%a&5a&|XZ7uc!nPWH1FY>XE{`>G1y;4ys_jA>|xICsSm z1k`h!>sQk0wIV~Y1YR}1hM}qv>=82TpB`8mV(pRMwI3hs2wA);S2|fYV?NS21N;=l z>yRpxsxSvaoWRvS9h(ALDj-HV1@ssTaj!a#g{Y~XLcI1MGOLW#<$uZ*LlvKqw%*mo z2&wjgUz4J~;h7)_iy?B&QN|QYs2%9BQic=NX28|4j zw7^&f)?Fr8Bq&Cn6pn@{1m5BUxQ}q_tH5a9uB&~zJ#3bXu5XT%pN7=X10Y@kR)JW6 z0yL7Osu(4kTg<_V)jpN1TD0;N-~d2)GM4+v>rP#-(%mWt9%+r0qfA2@_mQ~$GiZh{?b9wq-3;k+)c;^-L~2+m)Oc)w6py}J?R!-VUG?I_>W2@czcHvRJb|p9^Xqp$ zh}3Q%el6pEe%2XVLP193O)olzo`6_wAEZry3ydXVc-4!n$QJgJsEGe*atD3$OU|DQ z2h2~b7cb`-`JWEWv|%iZ^t?~uE~mO*TnA#6b=ZvdbEywH_E29==Dv^B2Gp7cPvVewXhbwtfJ`b=0@AE|jW+$(=5s!5?yJY17Y zRh7vB6LFz5=B{#(bN&(+AvE~p&5D}GREYz30dmgt`6xEk zHH9N(*UOd3c&rxx*A6zfm?{!oCjqAN0p}xdSu14jVP~TO4=Nqc+B=#l214J*K-2lJ z5Aa;3>IFjYP~MFc$L5VwNPrg;7=3(}n#HV*;R(ls^f|8oS@}X`u8aVyq4>PnYYeD^ z0JdsnTLv3tGNX_jp!6#NH9f%3cwD{)`HiVnyj2LXNl0l;4NwSp$-_&oxk8L-9Rn1!wSt# zZm2^ot}Q-XE_SV8rB8{d@n_sabqr?7Ha{w$y=sC=&{5j5Aq*elmp}mzf4XGRp89zh zG!D5zc9aO@Ng6RI;Q9(*W6l`Tr`>CdjmSv&CXZ0WCfU=jjQ{5%jR3n5FxLdc){D|S z*^FZtteo?+T35%&8@7XDj?oM?Dz}&q+-MQWZn(yv7+!zdIOTV@=sJ+Tv-ESg$qci0 z6|&$b@Hy937H~EJua8lpYK0-ui>#0@I6)WPh8@)+4^pJGQLFik;eUxpX}1l+jRE zR{&Hcu5<$w@J73p_|1bY(d&xm4>rFrsw&YW%5;#f{m{K}KX|b*o}za6I5y-%EOS;h zQp{MCQZ$UdUc#Y)n5unRH!e89Wli&T%#eau0ueMg zE`BT(+Tg=D&KkP>Z|fs0;If&PA}%y+28{y^F3>d*1oUr%1foIYh|r195DTJdm?0!J zIE+>I^kKv3h~26a9B6?15*38Sy^0uOXNcxHuZs%!PFavzN?13A_@8Za4=o7NlG?8U ztQ~Mm1K5?swpdYlGiL~ANyHxofgPY@V(`E5BDN?5?fR7u-bN-$oA~@xHqHVU3d+JH z6U73A#gD>v8)WB|w<4-8?kNQ&0nG7V*H!czw)zwVoDQRQ+%CHrO?{CPMO~}NChXkj z%oNcnoEp56nt*s?tbvsnVt1@pD_)$R@Gd~$_qKSeAh5M8UJeMXAQ*29{Q{(oQ-?}W zHB~7Cr1jZ&;+PjuAHDDnfHp)b#{Qxcy+Khf$S=L%A2$AT9OHH za0JY%2>?0vFaQL;COg-}@Y_n#0}M37=m25k2I(tatgfh?bt$SIyb@EYRIY{8Px>!y zRO#o61^fqBYEm^gcoCuxe}iMiqDIz4pHsbkXc1u;l=cbFHMSrXXg3YM zBrf2rO5YEnLdss=5W^;uiCS{JaS)a{~d$c_W8X1K6y~n)&HVQw_Eb68+p4o(nj-hdTXKgxP1j= z0EL7`Db2(GIK3YrUhLSP{h==jTe&ol{gOh|h(dC0fyC#v?H-gWCB*Vs7h0)XWL^23 zAF+0__JvG9NC_}hm$``z=silH#(wG^@YRE;e8jVV0aw}3UNEkl1gIhz11WJS<00wiC?u3d->ezR*)pvo%7IL);ub0qZH zs)CwXnfNS_(xnfr#1yw>hksFb1wi`zd0pN@Vwl)asYMP~%r+81r0|I!T1mi2Xi|A^ zIuTa|HqH{nMr<09Z)S~ZrP|6O0xY`U!0B-KTC|ajSUVDkUT<^*Z6s=rKn;?PDlA=j zm%o+rDH&wPjVJ($za|Gbt9m9wgI{3TG9g&hWAi~s zAV_p|6gACMTWA0qm32iS_Rz`RsUgLrdySD1?R9G^OjLpxPY!oz3o9|JcjHaG7 zNLDS`Oy$SUge7P&iH@Q+^5wd#DB>^{@2ncEt~+iAr_18%tp*F2-cbS;yyi?${1=~R zrOyZI!hs;tq{XQQYiN!u!C#`|wQ3;wNXNwCZ?JU#Br4a4CD}lOvuS>gx_ zNJ$Gl#3lhA2Vk^KtTy2E4+;@rlcPcUG;*LTecGbl&Ey{u;m4^pqjkYEjR8HMt1c^N zOC6CqV@C*?GrkKX-bowSuX7A8;;~ft?7kg|O1m6`a3N}{emF9Y*a7hmE52ZKS_5GfpZVLC!q6~w;- zhHCLDew%!zP@|-GacxR9zdw1}l~YHXrNQ-PO%`5yyfQyTQjs3?y?p<167lxuftNz? zFnAQ}MD)a%&N_o^7>t#tDnUOQKsXbX> z05hHP-z@)*p9%+5e~;G9pKxJ_!iT{UA!a`!kiFsuUjL)$ssoz(+VJR>?(Rllqh)k= zx6&mIBB*n;L%JO(W5j4BMM9)RrN-z`z+?kalwr)@*mu8w7x$iX&wJnV#`8QUJY?qI zM)5Vn-c~C4{p4GsS#IrT z7z>1F@^b!fy*ID_X%CHK!=uyowby@KeEyU1>fdEYDtQ9n$k;TR)m}e`Yb&~+LWd+U zh~MH?rN95?h<)ExHm*)dr-(2{sYd&zQQUQ5+tf5Qsv*sLLpO59D2$G2lwQ4C6KCU( zjM=M0#niup3v++}WnkNTQL1=*Z|Hh7e@{9^k9Z0-82;X`!Ne~-s&6LbT-HlR&$Bng zE0g%+;t;_(9^b^6wr{J5W^fT#+B4#)l}|)d;wmD z@MgsCSlaH*`AaNjWAG~@zqeUbJXR#mE(i&yFupWOxWp-Rlhbw_%hYpvA+GQd#hYcln(Fo%tIz!{Xu^!*xB+wWm3>hHUH|BVOW(C`24Pq0@jxya-Ov8JQ+ z1oVwFem|i;hB;;u$Z((9GpHBS&kMWP2jpvN-~INHzpunIotPZZ7=^}-Oiy;-hYA>)ZId5 zyZ?Z)Yn+c8D*b>mRDJK~Oxru`*!ee>H}{W^3bJS94BWez^LsKQ#~9XjY%fA_;y3%N z@37nV7XKA;e}Ij){<^^a`9L0RKOoDepI-RI4_GqRi==#D>8%Xx_%lhKI6V4ygwIRp=iAiRe=LC3nE4dQkl1#^6 z?`t-v=gM{YF~nHbdRR~CKS?Zxa8Y@8Uqb>+Ir(|U4?M*^OG4hp&4Rh&T^pz_smz8A zAI8^pab!9UmOpg8;^JNX0*+J=lyi$L!hvy0|1#ke=TFV+*?Hr74&DmPWa53uhWLZK zGR3n2H~nVQ&uEEf{J?R)Cr%J#W({V90u(wM3+H2_J*E%rz^8T2l8a)jy}m~4s0O_h z#t)a`y1+uUHQ|@CdVGg$jBMVKdSyy!+g`%%_uT!+Y%(?pFerr|0c=faWrUV4UZY;vQylY`{%Q#20TP07_sby-7F4p4Fs_f$`XWwoZIefHml)##fWV zh7Bj-a~RUmbHq1CiN+L zm$aNfIX6Z#nT=|H&<2sGbnim!IC5^#Tb?%QQA=;=JzdZ;1Ld4w&ivGrOAD-VXik>z zQ0kJv%6}kkO-4qJjczwwiI~((^dtCJ31f!DUf*dkp*0e+`i{vRc_t2%uOO}sz0}OI za03%>xPb3}k#Qi#V?GBlqgp`@V3AGrO#I~;xq%5St~sAk+XplL?9EvxWir0yTqD7m zXz5Oa9}V)f52*I>%rLVCJsNyQN05~5+W1Q+cn7j-)5!9HwzOx-Y0H}OCrt4}cEiT^ zY_6H)jGiTUpiwO+BX9>pjV$j=92?tK+(smjL>Glh{O$l1bHsiKkn+Nw-lhIOcLok1x~K9TVGd*-Ylmwv0*l{ugFV?c@J)#XLKs07@xmm*aF-< z1BB$5!VaiW!;CwHndiq^1mg$Y)k9VHX=pY&0GES=wyK*W$9dxa`3khdz^1A2o zbTn=hOEmzt?I$6_dXSu1dTIPWk~R%%snv5RO}6vV8L#T)a?-=kjt`(Z$lha0mH82z zYRd)aXqs*sAqA7jW;WEDhk8l(Ne{2YljXIjF!5)^aUyaqmdu}+wGkmNvq1VDT+)Sw zO0QiEMppdVE~Je_m2o8fLek3g$6qt^A$^s**Mf~O9$q=*vPGQ7vK3T8?`;EBfH-HW zF1U71+PTprw>hk?g+U26pTX=#`o%2ZN_v&;SPA{NVoXS07Ih+(d|Ws|A-LsXnBev1 zcvWFJGQa_7#!5YCXohRh$PmtWOiGEW_3Fh$_$;GEio{BUf~*pAj|}mI%KfSg+v0ng zm$kYNvc}1VKWJdep_WK!ol8p#oB`i+?i6O(jdk;FIKND@+7*9YdCCg<_nAMyTJ)kR~L~~~{l%0p(TgKL3CbzOc_Ir<$^T+nQmOA zREc;elfJ*7HEqDA_HazS{(_^h*zlDAxnt(@Cb*8mmOiN_X=Vu*p%u5KOM08qpFoTi z=Z+DEJ$;X`khZjV0%vwSSb343R|Wl-k!gd1>2j~Z>G?qC?4Q=j|F*MU=00!4hCrgI zrW|n*hF+fc#~rQ))!}y~T?K8QBF4`(vUwXREC)SKCfZ}4t}2KPBQ!OQ4=io%jf1Y? z{r0PFps`HGOcFNIL7>h84fcH1^v_Z{$@SW4z>D8Qdpz`124OF0sTdX8)W*&jc zE5>wu4$P_51KuC}{}w~1gXk_N(qbE5yXEZTciZLG#uBDF2GH4;>R)Hl$Kzf+lw%@RULOmYxog)o8Dts+1kEyMZ1ao_PxBL04$hrMg?-=MEA7-_HVkJw0^Ba4+R$;v%4n=dY@za_=@ zP1;ee?QE?aVl)TGMM2a)`ztHvF8LyH+NqG`1jQ8qRnDS#Rzm;==X zQFv|6xG1>U0*@+^W;5M~N|@bVhI7GdvoGQs&^mYu1JhzFMT!=02-EIw(ZGXiG2;!` zu=*4_G7Wi|oA|^mnu!#_D3k?dU&H~(3brRF#O|b=M!H*>(2yTfE}|QLjuiVVQSWhw zmuIR)VJtk&E9Q2^<0li;^dkdrfi|RLWH0VeUa4mUe~nFigOAMK>+MT)vYd5ojQ23; zf|8rA0EWvB4SbJJrV8gXo*cm$i71ZCj9TuSc6$PuZ0Qb!cvTA+x>bxI zxghFe4v*(@PbRalB_%;@c{PK^c>|~hQ1t~6DZZ0QIHO`FE`H0s9yj8PX@0)@VFEWD&+BZs_q{u5KSK`E_gFpsX!7kCN6jeKTR<|7)f{c8ws7RG=|*Gi-Bs9N z%AX6#s}aeC^MyrdY(l-a0O{=%^RHZ}@V2^Is;1EE_t{tc)swBPeA1Z$c30`CKuA3Tmfeie45E zO4{rzp!1Rp70}-*-YbV4GP1B@lBCVP>$b_t3*4<{3RsSZkjxRsLD-dtP)BBc20enG z_R-^?+Hucb==4OBx)@3zu-led!TY~HaI8|edGC_h2_KJlsWJ{$C@kWkgfn4JZ<0K< zP7BtKHGky=U%NbPHoJJ2LSt=D8lx4lBmJ`Dmxak?nUs&8fbz>OnjTPdpuo6 zgP~n;Fv6XX%qP_|ZWzegcrne0UWw3LQY25OOSfPxL76Krctf*96sKC_yWdJ`T2XqX zrIG-cf(b`{Q+(C332iZyqQ|L2f=^jD7hRB_NlsZ(tYtegGJR^se*}sZm2oJ3 z{~nz+D=e;w0i;3t-9)|23_CVwmEa|@YSC7j?lbQWkcjC=1)lUUwX1_f;t6%MZ#wB| zVPBXDMPJ2_m#7!+$3y&{U9&+8yBcB!RIR!t8f8sTFPz`YnzCeF1Q|D;8>4cqxlBY1 zH3q&Ceb_j<=}EuctO_zO%Gw5wi2FIY8`Q9H4V-HOR#L(bpN+v9?>r><(Uj(qDPJWT zu*^9@7_3r;hVi}>rzs}>yKxcVof;kjoBqaHgdNC@;`B}B+=rm>H5nzE1^YKm{aSB< z)cTRphhp@2Cqi1YH2nvEFO$wuAriy}W==Su_ssM%DRA7sIL+d8AUlbU1(F16P`!Ac|psg(}5^9pB(}Dt>Ii(U!MgWPmXA_0> zE{K}F4tL1+;ga503)HkT+PbJ8PkB4-MfAyQ;cranrvc**sl!hv*`;xZk`F{B{LN9_ z0#oM1ZhnPvQEUOL^n-ijqL(IbrUmJHY#o61mtQ>GY?A0nThVQQ6cnQDIMxt|C@wf{ z)=A49zp;AH0}p1-skP#>2kN!m|yRu1`=%b>S=vdW-C#tb6y7$Thdr={@| zta-xdMdF_%@|*vLMv;RK|b&pdwewQ-~$3K2y;ab-V4>iV>&K z-3Al7PbL(_x1co>idzDnkHnwtl)X%;6w!A=Q4R?7BqLF-Pf^ctLw7zzqHz&&*PS_4 z6-n33)_fb`)B^(uykBFfCp)Y_ZMhFk(UppvTz=t7tCX=Ugr>-54d~*pG5Og!QB+c{ zXI)e+Q|IOIJ-X;#o*HSw#wP_%0)0(C?LSwW)Joa9%3 z4HbQKiK>;de5^3Jy~&@ehg6szOS~Uitc!!>^wj$XsoEy|Gk#aj?jKOZeikE4uAAtO zB2sc0eie?t``a5rq~w@#@SHv)TlTmT_p^kAEl^bx6Q;OBoqjFXEjdyR;csqW1>Vn4 zdrO*B&6lm(h;?<_0y>&h1ryWP8>ELPxGsB(krwV1zC*`fkJ}O<{Wnqm5)G#<=HND_ zvFl=j{W8rGlC$qU@JBF7y-xI@o1V+L;rgjaHEThoe*CjUq%;8AjdG zE4H`kNSCFvONE5Xa(>`ZAnA^oKz&jn^<)z9q*kQTesUrk=4s4`$mF6U#ul`Y-hN{2 zg)^=^B%D!CYXT@m$<+u+&-)~8rkQSRz}*Z2#YV`!bj@3Xcp?fp+M2k9j*k%U@yG8< z#l-Nzu1*&7SWeNtPrCOAO)(7Z*m0Mnt(CaG3`go7o%U4Da^v;^d?XAS`&w< zcw*}HyM)0{5f{CPAb2X`V)4%;GvEd|V>rkKi8Iu=ArHr|>|`U(P#Auf<#bDoWHjX~ zMtWj)?;X%CvR0R)uQQ`SKK1GnW9y{8vH{5j*G3+6SL5ZzGQ3Kpn&G~RJF+&GK z*7$;~9u}hNZV)eiH;~HGWs8<%PcLB5O3@o>leky48galUb7eA(IP6%aYqnK94oe;` zz6fB{N9n4(sujPUAVPkjK|`Yj^boW#fjXpW)}LOA=EK1>a-L1`IQK*$*cVAAc6-NL zfnZW~wv@jms?51XZwf=j*A#ybsSkcWIS(fG4^X`qpN{ej0H=c}-yHrdO3jo90b2q_jeCD0_4huCe*hAp>rN}BSwSQt(12Xx zY|+ekn;s;$!?==5AMLFypH20dnTvt=osNT^h~$@bM2&)ITCoX;9~i8Y-7^9_yQTA@ zb2f5y$p2^H>QEG`_bP?O#rs?@Q?i*?6_QDvmnGEMS>&a2wWxaME;!) za-hUpO8gq0s|LgVeb)VI#Ku8ImY2vwj_0nv1i|V+)PNwr9GxcYwuSI&gy^?5c=0_9 z^jc}?k>3@8(;Qf;Dc4s&X{ikIkO|l7Ay>|eizjVUn}?Ug@aMx$9czcOSI3~YO)Opa zaV-ErfM_4Xsq7Vw_UFHE7}cpw5WB0M z*zG6vuB;68=|sZrHofeMKb}~D9v^`J-3)T7hKe^K&cCN*S3wV%KCVaThG2-Qobt{j zr)?XQLz)&L*_W;Y80d8Q1)I954Ta}zIq_O6qM0J?XhC|?z$6K~>S|I7{hjd1YG173 zmDRpJug03uBGBQl^Pw@Oq_s(|QN)G)Va40Gh$x<>_bP#FLn(;m^Vo8RK++f;=rWl< zKZUT%+Pr-6YAfkiH&QHozm{`q<%LkQH%0kjPz!~3CGEUh3*3SYn@L+4@^~W$({9L= zC1)o}*`SI_tx1Kn99E=i+u}0lKS8uLCCVF3i@;pKjSuMVZ!o~V(G7Co&wjAm<&CRy zWxg~;eQyZF;!%u(qZSlboh^Bid_#_swQdR2t1NpLzpDgYuteP}Ws;%_pMa|cTg}ma z1I#~RLS^dhqcJdp-)-?aE`TO4 zuYG8w2tXpEfQ^#I*N3QA!td)zR27r#RwNlsx5`?e1phSfT2LG+RTW5MG>qNyKudEc zsSz8gAE}5t)Mk#9#YNDIS+5!;$bALVMBagoPvxqUE7T_+Q-mue&h?n|i+Rp{=9n-P z-iPDFSRUQCta?qfm%T|f!B+{pN|M2_W3v)&x*j5YvPKzJe87C3?t!+NChc#+UrjQz>B24;qwKotC-{|5c z?Pswc=UfP;Wu2R#J~vPHXNH-eYCve4gO6ESlk38EmZ*8gNht16+)G^$_K%qJ3g7?W z3Lo`DQTqhJYEgzBURd3iMDfT3M$w1NJVHt?{*$J}7xWw=w+eey8RZP_ymrbHdL{Vn zeO3@2Wy|^k08r6SG_b~@;f=D*4gTKy?vo0{ZULy%WoKmy-C?*nR7>cZi8#&p;^89a>H*&m1bNazWr@sSvUL&lXV#NV?1EU#W zGm*Ums{@<1$FE*#hodz{FTxl_&f~wZ0+{&A{uJocTN3-F7sQfRy>=A8J^(6N-O`OU zQb3;z($a;JD_rDGP&w#+tUga4F&6jLmcqr;8$6o8w4_1}Q!^{5TBUG7{uD!5R09o2 zSIlXySnXT(4rk_n#P6zD3KT=xP0A(iy~8KRy}C|hrxoe4L}>|V3X$(6=3w#3f?8>X z7uiAHg@9j55`ZlQGSD|+jSmu)%9ndV$pF#=kz)Lc^TvbwP!yC)V#=AQ%3$2V`PCeC zQkq$Z2aCz6@HNzdyUbBY8fC>o^ZgNLXd_%wSr`{#5Qy*%lp^W$Su8m{LBR35jq(gM zj5IOhG9kQ?6SKX^c|KmORtJfa^tzMzQ2?$k;FJ|MYfN{sYQz-pop+HxegI&*xL>Lh zeW<1Xuje_SV9Wg&EVOWbT$WpQdnnyi_m|Hl(KgbZDe(}>4O{2c zA&yhz(QcraE16v$apVVa*?{Uq(X0#?*U;}nse~!2jZ!;JUT>MxIhdI&!AYL!)mGS2lpdBMo zOn>|%_n==s2(${!Ahd|HShnF>mOu~IQg;UQ=nS!ZEKc~&P1v4kt{ZCdZO{>%S=f$DLy+1sLaJrJ3Jzafh)=8@@1D_J-S>RQvSMrU9hOX0z4fFxQ-FD5Z3Rbf*>Dg|$!#C13v`d9o{i z{uOn<5Yj`8<&+1gAz8sy&}5D2PI#a|lWH+3tr16YT0# zmKFX5f}Rf@pTBg#LE2alVm008;>4U%`}UQaTZ&F9BLm#{Vzd8> znRTgH)|RQ-X{48pYK6TWDLiY5#{0G=!N#bUJzT9=Cf3Z=h|44gif4Ho>D7m^BX~Z0 zERdqoYm@7ywP3^ z&}o_wRBVE&!~U;KDTjUN`}?q^>4J9X?zhq-qf1xaDcWNj(AyKV?i8iBtWf_UZBq4X_hTalt$_=Ig!*xEePb6~u4`w=H4lh1D|-{>6T3j_-WxUEYip7g5haRAgQml&&+|*96@gM8`MD>{?IId(5k3H_=>Rqo5xx#KT+w}JA^1if&fiQ7F@bi7tzuob?aG18k8 zSm6RTG*&>xhlAv~B>c~WauZS}!M1C%pR|;;91+Cu)M?+eEEuT7CpEm>$SYf1jgZgV z$Qt(`snEf8XP8wdUQggN#J*ZkI0rDL(|*K~39j+R`2Eu$WfCel12lUeL2?SAV|7fP zucs2DE&SY=1rzoJ#6uGA>fH{0!{%##o=&gl3|m6Kc0+Yad|ZP+1a%SF-A#~>IDmL% zmJKaSsEwM2LEFX#}aP&1`BSDhxCi`>kOfJiCZYlp5q8^i?8iWMz zPw~JR@k>3HB)w=qm_o;oRvvVq;bMKyBJqAuJAW`ym6Uhvgn~Q!D-#82TkLY7n+cz7 z(eaCrVE`Ve8Trf_zh5NH&p-wbC!G_-eR5bbpR35iMC6-q|uxyAZZwzcx1J$Ep=& z+mskP($@8*Z_%+%RolWpUB|-1E4I3}&j52X{R!)c{miD9KQE(1-TSD0KU=QkJOjutFY+Xj!JprDa&~?s|w+Y=>$gzkaDiIKTQ}V^#o2%*lC43pe9h zw(Y%&@-Te^;dZ3QxMz*+jZz;TM|G&xeuZ5By}@G;Mo^;jpzY{G7JjMS4(}%(CF!Jw zskjo}auf^~%tfjpyHa=5)U3e1oN%wyT1btOL2h1Xn*(M~Sk0OkZ*EJ zU+rQld`7Vm5dC}?nD8cd&Jm47=SW_NfR53kwH->i{DQF!d29qy(Fq#T`3`buEMIQ6~t0M^!I~xsFBCH%xT5XY!l1 z)iKkz9zUN+wSO5=anyQu1_@L1Fa%}-@l}0p|NMrH%7_MVl6p+!VpT`$GKZ>%1Ccq= zD%e4*WJb%P38$3iAXvNPufR6A9_JL-n{Tbt-L81l*CMx=Iw;gyIV5c0=EK?XOzItQ!pHiG zTcmEMRsqXV>t+s1iF^uT1CAh8v1{#j(BOX<*UE$4a|7!2yRa@I; zGe*7Q&hwYpoPyn1$i=iSm&SxIIYVB{H`+g%3fNXLblF7K6L{6SUwx>px*1}wWB&0w zS3#6#OTorc?YzB%;7K0g<(5RI*RWc?mCj&`b`6^rpu2<|6soQ7tMIJDMf0~0&3d!$ zJ<#to;q^N32-6f=v@nb4Kl@f2dC1wWB2+ZeS+CSF5{_4dr0hg1p8PBq{H51zb-DufvcODaZg$}v=6@$(>|}=|0eY4xz?}` zy3iaW48A_l9+v1ZBCli?FnD&wWetM?@91^5-(+~`D^+3rp;uw7-$&mppdUPlGH2kdO|io^EH^S52jRv$EmrDV(O znq5~sVzs<{D;7hKe>`ghujK+iD^+P(jL~`1YX5ltywyMRrkq(AWD`)eIebZ1=6qax zPDl;W-tn>Ls!+SX<64tmZ)&EfZx_bT-z@dz8)th?$6}~jT{<&c3+BZ9NyRC`ovSJ? zX1G55sAF;YfkQz1{TcO5_b9cU7*DhBMTFVM(ZFkW{qc4?X5hA4-rPD^79RDjaB;c*fR|8zWPhXOiz39_ z%*D53!1Zy^8eB!2P{;d%16qMmB139} zk86W9Z$7Cnp4l5*bgqf2Z%N{pQU0b>W&v1M$9y=qP$RP4XVm(+tb1AX zkz!hi-RD|!p@7=xCY--k9bnwm7{W_gbg@oL8R8-HUhSBKJV_f zw__e79U>!Bm!k2at0^HfAKY+heq8Y1*8bU7q>_iKHx`g~zdf{?_7*?dK2m2T3>A;yD!U!DxYcctH1#>p6Kb(Mo0|*YypGETz`U z7{{J&FKBf#v8}rc>W>b+Tkn15{&-lqv%THUv#F(~oxptLJ@ZbWNZUZF#`V>Q%JQw0 zFwOOrjhwvY%J4@1G&jJD?H6sk*OfgO1zw8*uDw-AY`bzd6Wg26$k^1(A>mPEpYQ5m zdn*g@8P0LnJ~i9MyiL072vW2}OnIb*maNbQN9eS=S0JsLSG;3Jz^f0@YW;GM$_@Kq zffgYdO`nt}g>QJ-JTwTHHk-QP7zi4@7T$?dL+%zSRw?spEO@xpEqF9Bv9-6cK6+R0 z+`bh4w6?&KTc`sU)B05j=|8;{9;HvtXIL7-?$Op@CxUmqglJeAvnxtN*8z5qV3rVtp!l5<`9qnw$Kq*0B}i2)T>&X!W&Fm0+g+JRA2 zdfJ6~we$OR7a!N|%8CKb|E3Fap=#y-&*rANT)x|+uv?!`f&Fi0B+p)QHAbyc4kBaV z;B)dN%eqbGM?2%gTOCS51#T}oSDZI(xP?r^z;(cuW+|O$iGpb#4Irm?e@fEQLh!t1{pYB9PM31Z?mzUG$BEN_xn~UYUcXH;Bh8JO0{0a7vZQ*uN1M}dde!0#4 zqINCfGpORGjM~EBBM84fw~*%bXou$`Dd9WO?I$0yj5hUv$u0VHmSw2cZ<4TH74dtDT#Jx{I;(2t@@@|@x~u=Zoat7 zE$8*I>U0+IeNZ3q3W1Y+oe0{5aa{E9<>a<9{ZD+$ut}WNLv1FOUX7!xwTGd=mo@+m5 zB~CrcW5>SgW`%O?AfY45HyN#oM!Sd4MgLAs330Y+#69CHIrO=$WNBf@TQ#S(VBxZU z1Ndup(yi+DOIzb$7I+@O_LtPh$JSMfbK~Ue2^9g|k|O+M~kMkx04d(Pszi zWdgh&JXhTZhr>ITOIfQHqrEoW9$SSz5H^VI*BWM(D^XK@1Vp0i&$u?VqEf2NrIU`@ zw??&*v0q$0(30T6#pt0{e>pFR^+8d$6A&+Qbz>@Dt9xlJuBNnf05Q-Y^2@lfKh@}HY#@PX&5&33e2Y3iWb<) zYUYp5efaV0i(0Io9bSemFU*aYGAi$ zT?pS3T`BXn=Xq^z&`1)O02yW%%BCySw8MqngQI zYFhxSSPY;0#%}1c+ocV@toA4QjVoHx?|$Rc&f|Am9xKdjYn(PWENJ(()}tYN4W$#; z%>YNxf-yDI-c~gGekm=a6X)%p>|bFMyjt|eafiG7<`Yk8PQp%TxNUMv`!Yv#2R=R5 zw%3E1`OWC78y(RB^zr(g!^`fGZJtfIh234zvN`V}?QEsTl3(I%%{(@d%xsVP!7cA( zP<4aAUwVCP4W!!JN}t(T0727BjB?YqYt8?Eqg9!@=YKlu+1B_I;eVC0=ozn)&6N@J zmLx+iq?&_g^0qTta{DVJ%Ikr-&=bUzu$`OMn;S}LPCGso!_jTa&P910htC1+VC!b# zuXHv35cf!hs+PWrHb5e|lgrj$sla~I;j|u73m~C$^VU-GmcBh`7B=A8#M{^XxLFll zZr)O`6Rsu*tZ);~e*1a%e1lD$Vt1+S&HHIrxXR+98zJj0jizRrb^31K9e}BZKjq;S z(hqC38P)%wIqWO>&bu1}gmb!s)%9w?Hyy~x`+N1k`YpWkU@N;Bq7>aE_4rNLsZvEJ z4_BJYTHcPoyb^E{_Il(IdENdDay^O-xSSIa^2GJ~EaIh1(qH$`;A{w>ym!-(t2?PC zyl&n9DG)tFJO)zZ9dyEhJr-ds99^e>)kpFj5;+n*fJ{#Z!f!{(>9;LOu_L{oMlg(H zR-fhoHyECb12RIP6Z?qYn3RzDW{!!JT)C@3wrea20i~ zg!TW5Q|WHupYJ;xa-G9K@ZaWX5Ha|+Xg&NWs(*2L zR7I@D9|)Lpmcj?zm7;Pn56pi$CO>^xuJqV?)7oF@N%=DagX|tF$KP*`YIua3#S4f2 zKLKsKl{ks&l!JuYVRzD}i9kfm*IId!O=R`>6f%gYxTgkd(Pl@b2>;asNLFpxuTGuH$JJJHMXxc6+%(JNXUZ zTx%V|uVOI52LBf{oN4{Q*`5;ebZ7WZUzQXf@>}|f>v{__8(N_#d=&b&NOrR&>Cr8H zuFZo-zP^&#I@)g6ZMA@)IskB9vmbC4PE}uQfxxtVQF~P9ZIOd&cd7f_`F%u6$Xv6B z$El-gf%~K~5E8a6FXv<0PWDoDQbV)-A6tIZKX-Q@v6FSV8RBKG?G>HnEX@xg@|;fH() z0aM6jbT3HM+m1F+ZEUI(EWgX%*cLs}%$37Rt+hd7x*v-;V8L}r8*%rOFix(5vf zzb8Q1%Eg%k^AZ)`1pFqTwM0qh)Dm>N}ocN+#Du&kdeLP!-upIOZe{<7x5aC z=3n#XTc(eJt$VT7Jlk^ERy5z{1IIk7$G_dY-`qARohv#z`el}VaQYAL!l|EObvN=q z+coo4xAZd4*SrkjoUfVf_{ht{*O=%KoSyW+-!(w7#LL+m+9Km9+L1;#=RIJW{qjg5 zH!MJ^#O+NB*F27+#@)+SMLf$T+h49`jzWm!)tUi>3#VxS$wfe{+qUifsraR2!*>a~ z|0T3Y$u(`f=hZ;UTq<}KytX_;J*egt!pr-U5Zi??3*#Ej5faLeEVXQx0AZEJxR>Z0*?B!7K2;2_T>WM34uP*IVQQ>M!3r$4(Wu_$@11EQ108y zZ*y^yHTl%1CZ-~Wo&sjf;(yqg^qnPUJlNH3-&$Ap*v}JgiXZ;drPQeY6{323@xAPo z(49w+pgwaOu1j&Bx}+jF^{&76FELAkbKZO;CH5Jpw?Mt;s zu0(9{nUNrbT}j4DV)`%3DjpTl26z?-0W&xY+8P?lM?x94(e@|@e2_ir41{in%WEdk z{u^`6C-~_`+<`&g7r0W=@7lmF(L`(-i48W58y;Ye_w#iV$l$W+?)d!5KPwT+z5yir zyup_Ed$1Uouq?eVYQ3xM9C?(f1S@#^^9U?1_<%@}rIB3tlqH#_PS_QVx1rpvRUm*_ zS)V1E*zZ3+eiDDv;u^(Pq@3h+jdAsbxO6Japns|jD`cH{d{&|~1T}M}dH|JvztR%F z=^X`#ZFO|RHN6KfB#qy%4>A7-0A_hC^BSUnCU3ux<0)f&8xgrhR|&m-kjd{r8gj_% z;%!LJC}a!e@p)Am_(}|Vou^4u1D~!Woyp@E$VvKjiHFq`MLtOJj?p_%>7j#-H_2v{ zs;0k1zvNPQa8EFeR-Rsl<5dRqBUSclgjiSIg=(A0c7#MEGoJE>wU1(?-*NE~Vd8)) z5op+oFob~Q_hjZk8eoQ>mgA7J{N@B*3mjD@dP7RjFMN@MDI|#I+Tt-zxBWz;H!ue~Y7a+^y zvrF(=qL0I5miE_&sz!Hh?l&Qh1!8)glQKxtrfgdhtOED#W9{ic%jNDSJ&)q`vT_u! z@jTa-ry<(x+qy)r423?dB&uFsDuc2#A&4jR5L%54gfq&m6wSas2tCYcpn5d4AaU%c zY^t=J&J2!1o%PVrk!pimlQl^;nODEfflAN`CEII>T}^>hC9* zX8uq)qDpec2)oMM&j?_~o=zygNh8qgEe&=+xfpoDu795C!-{{(_zJem4585byPRL6 z6P+C|l6cdOEfA5mOi}yPt z4>+F-7U^P0C3KSOaF8j-GTeE^et*tKr~w9{35yTvjkA=o(3g`JGy)Ha!}le| zWnnCIw8CGFS0b23LPUlYzt6R*Mde&Dk>0GYRO%!QLR*XoeD z?zMfO5!+7DSjr*zDe$GjFi8yU*s&{?WHSu{$*WO*^HB2B#JSHqx3xq51}@Mlyy(0o zK07Oo0B5-zK+@)4TcIpd4PH3NMUv&u!(qQ9i@PX%DbD{=?RWzHaXamoXqy$e~VE$w5ok6949z@x=RIvErIn6Ci}- z(iV04p6tT%aV<@A7XT>sQ2^!I43`!2Na9ln&IA;VYG0b{-%SYpK}cqFq>apA7c+?Z zhGBuDjrrkS+(xWZNWn_u^o&jb;Txhvs-~7tD}0xMK#PAh-**JGMi=E9wzc>Zs0b~U z=?`>E(2n$AS#D=iiM|+<@tjBp+ugLeoUSBk{?P1Pbi9Uy1huGGf=7OKuKC8NpbW;; zqJsQ1XXE>Y)Cw|z8g4Sf)2V-LPEC&yAiAK_QViB5S;pZ#z1=e9lwBABcZJkUipXYO@2X0H}jOb%uMfuv&#*+YZa_sN>hcC&|1X4J3YP_Fzg>U_xG| zE{2HAb!RLehv_%cwv;hTRDWJsRPW1VuclopcH~APUp`>&?7zk z)R>JoDp7l9x*vUN-no1z$GPJ~@w^V`n;I!6s5^4F)_wvpTng>MrIVbbElM~YGo~$g zBTK6VVfC89oU2@o@vR4u@0KG}-3L4YO_0C=cf9x|Sygt@i;SF&`itBI9MEB;Z=ka_ z;cR?qjOtbQ19Y2gxmijsHjO80o3H_|d*0&vm+s*Z7M2tT37-Jsi3_Nj2NuUEs{Ycn z5v#Hg#0KcU=y@9xuVrVho1qMPvRcgcT_t8l&{(fe-f+_8Z{DgzCt6w+9@3qOIRN3^ zE>oPxxz;0whv!OlI#Lqv^rQl+L@3@^nIAGt+>lvD+^iffLb&6B>aiJ0@tIm>@AY@@ z|7jYF$+v+xt=L(EXw#;$q49U@c_jrm4vsQ2;+VMj?l#7UTXIarkRR8ovt0=~iCp17PL@g+wc6h*Ko&e*if^#=o*i zkznWauft+alQWGrSYU8Ni7vQ_{##CTjKtuWq2yvPDRBWzm#99&fCf|nLS$69u*QfN z79e0ipVO2n_U}U*W}kbcus#D}0Er)$%M91?01)^NA*fIU)2h4!GDZZg^f;LVvS7s; z>q6gpaEBLDKvVqa=L;%q3VrJ`+VaQx>EDDZHjkB^qkt@yg#MG*z>d&oG;+y0EGG-p z@(bhfsDd5%z{$~`S$aSOt40nxOfn*u!egp1rJ;~wFX&s=5F?XrN5I+h!5A09EWp88 zo>(zqdSg=rMr`GYPv49b$fFuq#tP?h0;fSqp z7Rd#B)d&D!+z$pyBS{^9Gls=-222o7{%~mmM@36vIT(VOIAg&Oc?p-h+`(CXd?)1} zl#;lX*m1)YtELA_VREI)?%v1&37SQZJk2JE$bC>)DOoU%1UZ&^2Cx(T`@tXz2o?0; zCHlOB8B>VD;F4nDBl?zrIl4&VbC}>LpXJhe3u`n%fQ>>0BR~dkF$Nu?6*9eX;CH}L zrGd0yTMRs3cm=0BD$me=o+g|WvVkwbuPPYAD~9hzXB8yy5; z6Z$Nr$_f!#F=Q<)!|>QtI2Q{})4}p`0;?OvFa3+hF-v#}Z zgATkHAIw7H^Kw^$83z$r#(T=M zFQ2C3Pm?rdU_PCYD$uJHs^}4jb}PJ~gP$KeQ{VGWj%FHKC@wp5>tfLA2bJllpcp#+ zV9GHxDnS~ElJqo>*G3mq14A6-nB>?(QaBj>0jw}8a@FdO1s(vh#D|iM!5XGvo??6g zb*5*(gQ0Fo$W?4hu`MQq&nsxVF$f5=)FFChv+; zA}E@qWYU`&rdH(Od6(0>(pYBt}yb(X-d?kVkAN2-xnF?1Nu6HXr9ekgY*P(slDuKLaCjJ2iLwMEXC%s$A zUjCGGk`MSbrFS!6`>L)XzuKyxmzBBZJ56NYp-m$XIL6`S&}`?B0#IuM4}KDx`^`wv z>Un5_qXE1~9TkT`?U5*e3-4nlpxk@(hCWcDdHn10)Wxtxdg+h z<9J?fc(VEhv3AR^bjWjtA0nANc(D_N!OX>NVRU0uQR}>WBu%lf)=AG2G`upzW_a!oc5lPiDUxt$}vks-f{_2n3p} z&x{7La@d#f?gVRyvpPY2y5t`3HY+b|s5k_&5Zsw5YH4<+9VCq?0&8#rs?jWRlY|qh zzK!pkufw(Dqv}iR5=Kt~)-N0W?g|{PnmCgtwNJiov~~eRr5qKz-`GP68uvmnx-5%t zCFg*WLPhQ=+o>sLWNS-N_YEXXNx3f*cIQB}hk>&&@@GA-tkf|r`+EkE6K&`3T!2`w z4_u5=D9>#gI!mEno@m>T>%!&7i%T>Q1*dSeEoYsJbigIv%6Dz%?9o~J7(AsJQ(sLy zV@B^F7PXYd`027-0jQvH)V)cYFg1TYdiS43>2M)mK%BzC1UzmVP&W@0Q28srO>paUf}?# zJ{zW<_1 zvb*Qo%`?OHuqfG5M(7;WN(MD4@#Y)Tu{OvetOl^Vs!>=Uq1R9qOcZL^P=Vb3sx#wQ z-@{)XamwMDx&I&HknMoq@KH^seWy5D5q=TGlkcFKgZk`twW~{=5DHE_8$=ST$-gw2 zmkFQY>!U;N-igZ${cu{59Ucp&PaU&*HKvLN=IQ{0{EA=na9TYPbO!@PyI^zF?KKvD z%WD2 z?@^5c6US6k%>$kiy>k$KcpMrZ;0~(xaexCG1I6rMs+8IWiQD>jwo9KPCjQhGMebB&u{PY>Gjd4$tR*QjDbWO#0@ zauxJ6ctOO)Nb)&pU4Bx<3|=(gDM`$R^9Cq=AnNR0mjNwW-duMBX{1wEKSQ`itimlfifGtFQf~4!en6G zT}$+;ylvdl0dQh50VDfuSM{?1tXs^SsfDH;L5u>Sy;Z7v{U6H_g~H zW_@B)tnle2!bIk#4q_fhtt?Tht&C*^ndo7!7Udtr0OYA$;2rz6R!4SGqBTt0@AWls zq9(p%m_MuaGUxH_#x@!2mc47hX@J}l*)6&q6PxtRpTenmkl=gbWIIa|w^P6RR9_@MgkXHI7%cs#^%b)qm7jO3Bmlh6*eSOFLK_&m2chYL;o=4$TeKU+b z5&i%*!LETU8R%qM ze~5`rbNKVJ4l2P-O`A9dRbB5658C2w`jyIbI?d*;>dKU52D@C?;uyB}X!PQyU^!mu zx|QMRawXx8ayDqs&E_MI1gQYyiwiyGN5`9yjo_g9ke3*_LRsyRAajuwi=Vh0a>ONu zh&gi@(>?_0PfkB$h+F(QscQ3wS&Vz;=Mm#QM`FU1x8Fq;Qy{&6mW*EFiOn0W@MiTu znsLy;ryGeQbhy+#)nVtP&i?EUDbMLEh=sH_Vzg5N()U&w0aXn~m!iP&eq24!g(Ev5 zlRs7ESJ8geDan^oC;QU55wXO3Emdpl3+j<1VAKjeK;0CY+c)zi*G#%J%vTNM%>%yz zygn`X5SKQ+hVVUJ!V0k*hd&7O(^B=B&?5O$qR@NUi2C~=d>^aKDhJ?=?107#5WT9h zOHF9W?b@^8^61$;>XU27l6#_-*fND&3O9~FnwJG_66qC8n;%$X7q^de+$wmejfBNz2*}Gqz5{v8iU=t8f;rv*X~q7q z(mg_-d^dMEYseq#OOR6YK*(~)z@q=wWY`ySD*{F_ichNhDKlFVZ)ly)6>@y)Bxmgk zIr0^~Ifr7YPe6_<&mq?x<<{c^xvWHuGDy4Y7srA(P0KN-H_S?f?u_fZ+=u-!I{0@#5Gd z^)@2rgYtMeWSLdRIZ{5^4rZqWR8_1>GWqNk8(BA5P1LxiKt<)A*t5^mY_(@;+p3#v zS4SUzIhH?*N&ly@6l&roIyjxE0~jJ970C}TsS&B%+~~&yL=lRphmwv~_*{a?weeom zJ(6yt5i-#rlHChjMcqnfGB|Uvxp2JiO>5>{%LVpQ)UG@c@Tt^RB`9~q8aGH`k}2ZJ zyS4AFTx&PHF@`+Xd+k#g<4%rl3=jMtPw*`a8p{b#UaJK!lXmT3hF}43C+mYg+c(Ib zwGK+$g5r?BC`znHDV?=TKvViC%IFkOkX(xdC!0nIP(PJPh%1YWELmmVI*p^CJ*tEujR`cem8Mj)S|KNqF@35QJLQ*98Pq?Lt zlZWKx=z5>&KkrzhK`ueU`@UJu$fQ4~ovGmG;i7WCH{k*YezT2(L7tkrJ_G9vW$(~; ztJcUMf?Kz$?O93fTdj{KP^7w4&IIBg6#>7zl~`b^P*FW>g%UfH)7n#PoT(nbCnqyw zBLxqj-T>oYUviND&+AVKT1|RoK5V7osAErNO0&UX0wY`Zvx+7Abmg6m4>REhBxJVe zxbbMdA?_6mJ|OJ(+2txfPUbjA_L%V5H^QWn&5=Ng9QTbtU=L>S-%e*kxBGnJK8tYE~CWuCk7$mOui3HwU5l>DZx)~e20qLmqML!UHDNtsbCG-=q&y7RdmE2+byy0Il z2uIwerdv{?4sESR@1~7K2l_h+HjPN@ax3zQzKs)rR>`DWqQD{DPRLXUARLc33t9x^ zl+nP#WNFCnKF-Gi{w_7Ts%)yT@~X$nIr)YQrrNPr)Ye5xeJe*gETg%}F(Yj#Lm1dn ze@phyb`QlV-30jVHwyXNpW5ky`U_SAI46HoA_E2OF^vrVFLhh{46r4e>d3It+WW^r zi!o^3OIglv3qTE8RzFtsuJ1u&Al(kh4V$cYhztU{(N`2aJ9CO} ze&9I*ek90ejNs%*E@=^^t~MAktaDvcMzd^dCt%1XEMy5aBzIzV4t_M%A^{Yc`!Mg(Gt`((*o=gD@17VC`O38Rg*d3= z;H)TIaXSeCT+%CG`m+W}-|^^$ohPJA<)J}YNs?m?E)OEL{7_Fzp#i2CN_oY%E%tj{ zVzd5zb-|+Q3WE&Bn$0_H!O{F?a4#A-$4*rr9v89L4q{A3+^?yPwH7$&+sX<+#26Gi zAJj2k5Xn(bPR1BCL^me=OYkHc$3gJ3$Y;Se?1 zif@1NHRQnM(2WpbUsT1LR{W)QPB*pdd%X(MBavEwCuKs&N8gDkI`AJ6`#;xSFEP=x zCho_W^vF5|d+9<6<(5lw%9J;-qxX*5vWRBfkJRumvDteytEAD=ne)7Qn_p2MGrOXJ zd_@vnQ7$oGdzW4WnfF9N#^@VdzC1EZUfya}_WV-yq<6R@Q!ik@K0W)gcOyzx+ucq9 zbrKm|N_5nXJ!%#K46;&Sr60dA5MDLG(KdpqN{w(aqXk|eRY5-DTvJ*RgQ9$nIG;U_ z$M+AY>%EHo)Zp15`=B@T>q92>*dpayp9`mAVb96oxAV?sqF^dAe?>B%kczR99QLUK z3XHLvtoli4tol(jJs;M%@h5YbaVM0YHyGicH$kTEjxlJJhT?@n1qUslO;-J0)OIxwDa*e+3 zh8Jsf+PSRlbGja=E3KlA=cq$VcYDROi<%YR~#FylhZp>Bb>2-!8^Zvb%`|S@V7=Mpt_Q2puZ|?z(#?cpIzZ#c8k$haQkFzI6&my@h_z->U+jdHFM zSa$=l2+sY8c2)Tql1&z7poQ$Yru5DhuBg2a3i3(@l$**I(UMEwODu1*uYAe(C3?2| zUO)zR^H?~Z05fRzah(hYY8Q41kvW|@y%LCCHN-{?rsxt4J)kaMbV?kv_x0!l3qF~a z2!SDwb}T)&k{n61y0nbpgD5dn7YuG69Be1|Wv1+(d zW8VKz1iGm2O+x0SFSa73Sa>3C03nrluCWl5O2#T!JGZaajq`oL`-$3~HJPbj`()k% zpL_MM%Q8%3RXu$!W2T5ZM(ih1kp=k_vLezPvzh(t-hO-wU(rZxlw5gIhmvVhgHcwH zI8m}_P+Y&?k;LC|h5D`c@i7SSr;7xLipx0q&uII@#R6o@k@D0k)|ju6iW>7x!!EtS zna}8DnhRBRX@rq5fyZFzUD@t1qnI29$!tRycYI)L?d`|KXr=4=|Lh?RfpPD*kqXFb z&JIzs=&55{8$c@NMZZ;MJV#A`YF#)bc_T`xE(=*y}O{+l9Ip31RC(}Tr z!pLY6S~5p#RqGUD4tr>wuMI`{wvsxvg-%z&T&wAhA27Ks;FG-bf9?0p9_>A{NL3yp!63{mNrbv@kYu7Jp@g8IQ3I4Y@sLO1z64lkG5=j;XstHe&=_1hp^?$Z;66reMTO??w23c|cefPJ?8#` z8*o*u>o0^(HHXC$Nr(+^3ni;Hrk~2uMqR09sZnBwUktYjtL4a3aY8a47`c`b5U#IK zNz||3Dv5k{&P3;nTkJvU{5+M#0mao7gA$Ruf;eOY*w8%_=3G!{yRSt;6v|nnz{iF6 zJ#dp3G!3ldfivp{zPnS=fg0{hLOF1N^uFx09jQQ|0Zo#n#YLN%r!&#{@gjrTuVTUA z{z7K!)+P2i)8;AXPh#sG*os1R5)2teEf{P+pXjIFpkV{~hx6YAuW8hGDlnMQe?8FE%* zBt;_s(dI2-KVOe~&1G9GXlo!2BpFT1VZApGZpvexdBZXR*pUj|59h)vbci7q!E4qo z#kQ>Be+ZcK+l*zwja2d1Y2%irc=>^D5<2Ctz$V%UgjVq;6d6JX)7xwE%VaB1{7PdJ1lG;oEbAW&(pgD#VS-D95=_HC;TV#bcaqySelF$#1R7tyzKKI@Wf*1jx?P zj+aG=3OEgeDZa*UpH%2>M&WzE8c#qpi^+p>c>wjM9at>brCP8J~))T4Y24T8-sNdP$Ryq$rf$Ua+oLgQ-rGN*c}gqY=>E(X-0>}O3@vX zVPaojB|PE$HRaniGh=3Kj`U44Ca;)8++|Y zDjyD=aCYgsp%tfidA*Pb_h5vzd=n=hK6OiDAD-+u4qY87{y`ip!BT%ed0QAEc%w;&)N!>!0RTXH9qPTe|HWjdwcc+>>N@dd{G>@8>E){;0D zneQT~ud{53=uYCKN0Yyq+!zVwXx0qSXxfERnKg(MiEuO%#6ZZC2u4|fl7u;2%ebAc}H=Gs0IVN8xOW4n%R=)Gm6VH>Ox4LHw+&V~qR&jM$w= z<+Yts!my6UXRy57*RqDty~q*BgvVbOU6-UQy~1_(A{_;2M=RYCbGHVZyvsF>52sa; zn(>{cNJDLaifHE=U#N{wg;E(7I9hL{qEY$rBp2Y0{6R;cxVszZSq5IV3>i~9ZXs1@ zx$4f-<9Q)>vp7LyKV#0{S?ycLZt6qHT@mQ?qR z=}ff!NKCvftIRM*;hL{1bKC%5FT{bRL#Y@htag8j|65Ib`Nm&w5S3|^u1}$Zm5CyG zCY%N<^f#jggUg6EAf%xBEG>nW<^VGSG>)`{-~-p)Z*L>iJ~0&5fZAx{ z*lq6OxYjIJ7`GfT;2py(0yhKOB57yA@4DQ-vXAO)WlA84bh9X+q2j$(Gp-EDJEipS zw%8-`m-s%n#tD>=%jXKHj(L#F^c_Rm+xL&JuKBy-81XYjz=Dj$2gn7`$4x{s`?h_# z=!{DnoK+Anf74W>r7?=9!tj{KC zI>qL!Fz&V*#8^AyQ~Gh+N+NxH!o4#a#@Y@m8kFU5E9*Wvj{~-AUj8(d27gR z&kt5W+F$;MpVk)J%o=8WZWQgSIm!my2;FyJX6`w4H!l^w?Ib>BJwI#{Sp>kGy?8+9 zeVo^_A`CX%jrPOUVFao`Z;)p4j9teQf~`S#n*=u_<}TT5QiuEy8|*%21zj|Wn+5|m ztoH!LF}I_p04<{Pb}DV`o3bQY$6-ga)LcE}1TxHY^mGDFMsMKzK!B=%HK{jSh#PuLhi z=tYHxM`3+_i4Q&07g96c%WBCHGN6{%+|Z4t1|J(2Yd*c?k~y9&QQYWMl|j_WlZA&R z62Xw9AY#dnd8m?4ndkk-g(SL>0_3m_@liEYjW+*H4karcb0e|j9}z@01J`cew(cYy zEg@Q5fThpG4QR@UJz?>f+1<^Mj^eCYif7CNgs7F)vIJr_UHFOX7NQVqE37~4#M14= zkv^!)mkAV`RkXm{*`S|e#d9o_Q{FuR3oM9x{%*ZH++syIt|CfoNZuAJyfY>rf@!E2 zXCexi6s-<3cG65Dh0NsR;k#?IeNu+FP*7Ln5PnFaQys3tfag{{^mo4oIJp|cF40Ld zfBcZ5d^!f{`UmHW^-zRMxBj+d%73tJcr*)!KxYQq}``+?cjEP-*MgB3*bAkk7RXok5 z2paAziM*R8`}vAOIJK*f(DWzwq#^)+NF#qO=z5s74_%q2WgyH6h8BB^bX0^D)bClq zQ?u3~^)0678d!A~`Q$x*o3=>sjfXXI$f}S9k2O@0cO(!ZWE6{h@(&|^+=)m~vXXvw2R*xG%sYI`?kTQOlg&`xBBnMj_v+N_^R-D)Wp+t!sn?3n3~&eyl4^E8VVW}+3j=R1f3MSeell!<)Q(K;416r{LAAr31B+M15ic)EXke#HTNC`oG_DC zMWY+L$GPxdlUH_&z$S5g^s)aKc@tnCIsdObfJlXv-yytb0q^XEYiRIYP_}_O??H{} z2FbU<0q~5iWcS&e1Gq0T7IJeigyRS;wmdg549o#@_z$|VdwxL+Sin2mD&!6~Y>6M$ z#8@oBsYQ(;pa+^JJ5H7DyIzbau(QfBaZ7aj8?VOzLMB!i^JuPak88C}af3x54K~m; z8yyT-I7qg0$f<}%IcTaw?$e8%6Y^sxSm`OO;a%KZrW**l9TM0RabJpDL7!{?Xx&nb zsM(m36^_Hv4&=U_!3mDIDim>@RU*8*eY~|=<;Dk_Q7tdrfO~&jfj+ZQsLhZ%vuBR8 z%Izhn6P#y})#GA;L#@1I93{(Ni>>}KU);_Nhcum~rRDu^CwozHwVZKw*+*TUY0pll zs{!#IKY~zv(VuL?c3STF0P!h^dY9>C(0G4jkAC#CO8?_+pl=Z0s```J;P|IsX1Wcs z9m%cNJ0SZaVPNuZd}$c3uB+}YpHCxevST=M6Y$y7L_Yo)ps5S+Xl$Kop%0xYtv1ovlM?v~>5_1GQSFfe!-h?Wtv)336a-Y~{ z57HJ);AzE~2?mv;J_T~|XFOD=A?}d{G8cG#v`qs?mVNq{1#*acE-8cXG#Va^MT#cQ zJxBGG7C{9tMOEX|`*6o$W4Y#-$~7${<@SYgox?-w4TpEmv0e+-U)zxLxdxhM=^%j7 zJ!$1SuH2qNz{#ajq||C<87RTm77pRVQMJ5Hq7%+j>p%f>>oIPiTrzz^D_&8Lu1<|d zc^eWm*T7f_;(j*m3tT);GKccu0A%)DhmGKpt8J5#Go*L*RNoxQEPLp$SyY>64p5hv z@Dh@!u3C>}PYHsDU1;nKabJ8QS#nkleF=VFb}sc2(fkz~!X1I7s104)0V9KUuid3m zB&?FfYT`N4`g6~$wMxKU0m!tGTh%exKG;AF6}V2t$X(I5Tg*Ilt0n{FW!MmM5bCs3 zOET;wq^*AsjgEm{Ph?)2Q7x_s=NwSe-lxYG)-FfNMi^Xv(DP~`t9vcsWDYa zsX@UJm+kFJKa)>Q0yYLl0?KDe53h@xq4*3}HSodJ@JoT8rH}c;!`2#_qXK?w31N0b zWZQmhJV!)MCc~a3&~!&Jzp?~=TT2dk;e6m)c>-J6{V-UZZaQx&ES-rW-zHLeqN;K( zr=X=WC&QTKJLo`lqRwJI;9~C{wS4deiWQ?NFh`kvu1IT=aG736p;APPc3(6u8ay${ zNt&w=yDpBnQMi>Ph)-jO7VONX>tiH<%;`N`wcEM5*rf?JC6jvJg@hu<8M1~%aT-5P z0OQI*I*Jsx1`J-dE%{em;c7_n+aWx9NOrl}zeUfoYA^PF8rMy(oR#^sJItbbB=n3@ zQ~d;6%$W#?7m6e}Izv_r3PIbcjJN;|6>L|XgK-!(qIt)2q%n4-QM~7jmpjr?;J4k% zS&0)-2Ilo)pRm?B`VS;yZT(`|Xs%QXc=6f>^=i^R&kVjbYtdr!rq7S;0{O63?Dl%>%eE z4TtG=Y}rg2KxoEtln*rO&_3Q)?A5fc#rDo(@3N z9FK=U8H9wMeoW__kK=M!@w~dqu`rT?o&twWVCIG(m1VctrT$_skkVM=nS~)npvK?g z@;d#WG#u3ge!U{!iba<@0+7LpQ!bafb9Go@b%HaBomq#NXxQXnKG1+F()d^eJ<%HkpuHw~(JqiTCnyD16PWAdz4{XNUh3UVjf`-AJ@(I(Vh4k}T z>~2!6eihuaU{v;CT#8BA3>|(C0gooVwD2)uK38uUQ0T1|LR!HpTqlufsvHLkW?;PX zelBQe11MNpSaqc;3Sop@IS;a3NoGjD%)5d*Z6$sgd07XIafAx7`D<{;E^bbR(vN)R zQY}11*f}3u>I9dpHbt^c!TO9Y{;Cnvk3)wr#sq94?$~mdctdx?t$~Psfc0}r)S}!* zI{HMe@cVFfT4_;kiYo;(jFZ65OYDJB?D@i-wILr-lHfpM*jp?%o^xC3Ur{aXn1@~= zLR+`Z5YFSG&DL5@ObtVk|Cdq33V&q?WNsUmC+YM!n_}yh=sGg=+ydfR;4Eq&sLGhF zf>SSy9AKkeB#|9`aGrvX3TlX-Bs4qf7`Sq)F>MA`adhuYht`%_D4fKAHt2BK9CT*y zt`)F5M6Uw;&r!IUwGVo+6Pc%1QV4%*GzWRtYbZy*oDVN(mggelFdMQ>!0?~=MoKnA z8T;!WC|0LSH9rb|X%iFHa@SGQCBLk+tRShw(2CQ_XuLn`q1bO6H{3T|-wY^4F?jN$ zivm{?E|~j2-*1+R?hwTR3e_*(TH=k%ITmKAL>PfgUB~;7@x5S{s_?Y6^Y7-T-Ev$I zxZS=BKYTt=2i9LX|80E~MPZe89|>vjcQO}k14|px(s_*OY)b|FVK=t_!`W#;OTOm0 zQ(Q5q6+c7pCFfiqb_)=_BS-Ik5?7yw-{#(R zM127Q^=fJyOd9aKU7hfPR5fX+!f4D=-4E{otMeyfWWe9mNOt23wuIbOE+_oz{F_fY zbz_b_k|e#bb7LEjyo<;cY&0rTbRr z0X)ID63FAMW7*>!WL$DN;E+`*`?S;E*ksNVp)WQbbtjWGU~7}egSIS7gk}&5eKn>^ ziKyI+#g1@+jM)Btx9XsO^47yd6o9YQ;d1UTGytc}M^BLS6-j*kHRHOQL9n6|-wFxa z({UhwPM@YciNL=>m&Es(T&($u21s;Tw$VMQ?8IETqDD`n2R`V!+^Bb^h{^y-)Sh}X z^1FRB`C118w?EQd2y-pZ@ftUtQ@FZ9@q^9K?<2+t0k6#Ax-g+y8qyrdZ=%2tK;=1dG4U!zs_04Kz@ZQs)11?sVjmbFKrqrIJ`lM>xP#JU$4y2JcB7 z$&-(dYD@uL%gDD*W&QrJ-WnA9Eg*Z1M~~I&|HGxQns8+>kU696(Y{b*EfzG14Xc6P%ucMNZn;p=_%hvI>51e9aknK!p0&Ok5Q>bqx zx4Dwd^Q@0U<>&{q1|ROYMp$`}F|3}%yeZ~As+aoHjy;lv0PgwDZ_krL=&q*vFMAZc ztc%M^$n|tx(AjQx1Fx$c{G4nNpKbKgHd-z9qw@_@yIRP0M#vt5;_>#7e!%S>z z9}DL_UXw3u*w}ruwG>QF>?l%z+vhRk=RrqY@jj&|G)uXp?h^v}ipHsCaL-1CeNIVd zl+{to06qXn&WvbG1`%(>R&hpi4>V1%Yr$%Ba^B5&K{Rv?PrVIwwRmS8wX3QU z3YFEoyjjBYrLH_0dtQP9o8E1(tSKECYYdb3Ow`{ZM;{jSUVqu^frG>d&W#><%h81W zC^5&y8!7J`NX)Eoel?z~$228$xklZtaUlz0MyH}~5D(c@24{jE3NFOBHi^SHjX)Yy zAwfF3vAhqLb_my%0|DTAK5+Ln@x4c~b9jC;THWu-`Xqu!m=l`a%>YDgrv!6h=G1%(t3h zVzzcg5fcR>h0P_#l}x~x?M|_P>!D+!kzP63cL+dL1^I~#Tw?frqD@EQkiT-T>~_5C ztp-)sy703jq~I+(s&b5hDAS~1$y2G;-42C37FvUJ z4s9X?s5vzZHROstHswB=nohLyUMuP1z4YfUh zO&t_(a?0bK!IVG>0%auJBdwnJmLbD%Lh2NdYXR8pMhAgYoAq$u;Tcp_>}z z31R?$C2vfbrFO=|;uW019(&^`-!yjx`>_(mJ7U*#+KTHY9Gpy>65GSyJVBEK=K?yh z0f4W;ntcv!HSr<0hI0MWAB11?;R<9W6la|w-3A8fRT zXca&wN-LaPfBmwp5W$w-u2mBGC1@STS!C9(wg2u4=?BMOS4WCC+7(W|1lM3H5MFj~ zKV^11pJ{g||4e+m?B(O?6XbPwuDmDX9mC-7BtM2KTTV`2xS{=$tT=n6@A&E1gJD8& zXAh=WZe3wFWl;r5U&cEHTP2x=5)H)--# zE`0n0dz5>OADfaQtn~q#_~xP&|KoCRc9(d4KhQuS+S8YNAm$a~L(d=#D}pQaT2ydu zl`k|P8t9ePp+;H3>{nU%=y#+A&qBe9ho)ZyeCN6W~k$u_LAwIeVqkTSU+o%@9 zLigRFocGYk7#`wf?Nr?-v&GFt@k^AGe?{cCwHxS5B4|{vh#DhDV9@YJIMCS$5+S6-Ip-iq>Cl`Zjtm_PIL>wkcyTr(nlg8(-Q=@V^~#L<9QoAU93;e{4@$0o+?ZY zL?U!S+ms4T^xfqcZy{u6DI*jRs-@V^u|bE}?0lv-vAP9?u558JJ+^)Tp4hA{euT;a z3}q_!t#*|k4IPjwDhd4jE(p%!hcbjG$0Ad%Aj8#VM&?uYFcbT}!V*BIFhCEb^>t}& zlR?kOymhxK|7+Z4}td9&pkd82qJP<+sUJEiB5B`W_m7A4FP0dr=M=Q}b-Zta z_YcIS=*KEn20>gsq@^*S0SO!IE25FNQ0k7(cYq zlWy2izl==h1EANu=jqXDuh)xU-h_4c7q4j!?+DJU?Av-ox8zmQ7+xl zoxG%Eh`aR))zEmPAy<(Y(2v(IFE^fypnLP4)W_}3rk8;BhykFelT)(nF18NjyHGq8 zj9?6E%-k;@ixt4qiYgCi4sVss}`ZC7EmM^UA0`TDPqm++|Q|B0t)XY&g3u4KN0ibxM#y4w5Gkw)&3!nZV zt%v|!2AFv;p5>Q)hO(&Qk+bGi6-NUdBgRqfz@Lf}L@D>eV#Ij<+rGRwPGF9-%~`W2 zPkBXiA6)KFy-amwWZ(qQYM?fMaGmpaDDu6Kl~M6xWTE(!c0K6UUBln+`nHklLHPX*@&n~2w@v1sMJX{SWi2FcL36fBRK-M) z3Km{*ahfb5#P~Im|0V2wxt-0nkUWELpO0h3P;;(8T zv#C&|DNZK9pgBou88kJs1tTT=`KSXE4v+f~SZzIEgx0Z088z$Z{Wm`}EAZ^Okwar0}@hO0kA!$*xLebK0*r7R+Cl z%-gprh`WSt9lGxoYrwgtUU5LbdM3-9DLi7}z#;uWVCam#epv}Y zwFJWAP{Ec4xGhVA?`3R|4MFJg-MmF@!&XGCR)@d^%R+IY1z5v?TVYzkSvcn14J$Wc zPOgUkUuxn2UqGP017uLc_kjN&64zblqf20?3KFU~cGFRw{>mQ(Pf23K;?_<6cFIOx z@tL^*lX5VvN+E$T6(oi#X!`xIPtr!E{i!5o+CGk6JjO~r=&}U-fudXtoO2%F3^lJ!HW+Vqxbc<( zsbg!vUL#sZhq-bBG7p{Q?$CWJ^jo>PRqiqk;Jv#(_c<$R0hSqla^#`#;}ZGnmv;w1 zb`Z#j-)@5>Vg9h;^pu4ssX$$zO7*u9Nx-A@#N9o~qAKIR7QkN^LI?C8d|K7wmSKr-v-8!A4=wQS!NUNuwZ5=+{;=$8=8 z{6b1dj4;f1y&e7RP(#JLOOr4d*O-OjNtH~kUoRVH*-SSl#FHXGoq2R_w1f~uwfrWw zgixtYb&pKnx=7&0_4v=&2C-_YrLhsM2begTgMd&XIa&U}T)($S(4SBdtd@TW!arz$ zKsXJEmf zW(RB?iYA3r3nmqb` zlHxgqc=(2Xj;p&sCgs#$BWby#13N|U_-fG$SoC^$N#T006cQnCw@`5>f>xks%P+aD zD2~a<@aitq89}oop6<7aF$Nq$znm`t<|K`1WrVD0sY_@zAp8ah3uiFu9_m;d7hcD= zRpCHL3mEi{NyC_z__{(L78x7r3SJA&d|l67ik!_3@hY}J>rt1JtiFrmSsGwb15*hq9}%gV&PcPm)XsVQost6tqwst#KHA+-%GCat|K>J z7x!g-p<28&W8hl=N(z>)dRoHh63b@+lI=jV$d;i^fkMg3n~xAbq55}O&zS&t`88AX zwC%5|L0`BzS|Z$?c1+7X>U7yetBo*V6m^BM7&B~~%qq?wU(CWI#xvyh6fk~9TUaUU z#d-O?$AJ^d`4&w48D2`THn7RE>HJ5-n-O1VBi!!EV=JqWwq2{Z&B~kze%2w8 zEXB!GSF2$ZIJ(@?>(E?|q|2jae#1@VSA5*aQ>l64%yhk4Et}$s$b6=MOlY^!9+;`X zT?K)+E>WECA3-f!z`*Z2OzhG8YJlR{VW$i-t+4A{XuiCG)_4ublWEktl}bmUqG>F~ zw7XN(f*6?(zFO8)Ee!o-!jrp)p@2teU3d6rkNHPLhNN)@J=>-p_tgALmjF8v+XfAO zH%Nosh6`yDoVB7ZzWodE#?~+G3%3?ikG_$V6I52v*kr|~lvt{&A%mCfPg`Zr62)pE z0eI4`%c4u(7D%i(pV}q-ni04q#(H9as6kld__SqIv?l;%}r>G5FRmTWjd?_E!Qlu(maL_{9jAhAc!z z>hd=xKGzBK!qO8JWs84i4yfYjTk+nxo zlgM7ODW&L8g{5yFl#UD-3*GgR91h!jr&=@@z#h0AEU9#t2TmG0;%x6dMQUqGy`R|^w1RhyZ_zM zSWrjhoh+ObI%*Le*-QC8-oEZ3s6jp`^MV<&Kp);VxP~#*YzkrFnyHX5V~NZcVUn(k z!t9*38Ebfu4)Cg2FLeROcXfKZj-O#DQYTV0f69sWM^q72o+d;cg&a)vDISE-<+}ev zhF&p1ye!;a4KM4@9MB3c`x(>=rXn1WLi0z4aFaFTY5O|hldi}_&n)uvaJWaR zTm-FCf}WgpBi8OSr2b%520wI6)jKDsTu~h`RHbHemVzXI_+i#Cf#!h~_dcJ}B@PrB z*#*|@s9mG*8Mm_?0t5FK<{@>&z^{^exMG2vUDh)0ThbSdU=xuY=vIq{7heD5{2Dze z2U4qlxh!=Cbo9CtsSEOy+_MC-z<^hU-qph)uRW%l2T0-(FG;*v7XC2rQ5$nf0VHU0 zUzc@Et}HY}NjZtvZQH^wFm=w(jt|d**{(j@u*^iPANG>^W&Yi!ivXMVJhn9rf@Oj| zOvh{*g*{~}!pp7eiarNP!r!r5;B1+%vI>>zL+Ey0=QMsWvJb$Gk1q`f*FK zWsOKmcd*mTqVJv7n-a!Tl=jpXasL*0rla#$1R+fhYoyEX3Ff;tTI2w zKmWFkI%72sT&93*`iFeMPU z)R3kiO_XE4VJ4%4clM(?o;=^10#`y3NL$dC4=FcA<2wc~yu6kGU%7JE4Ed&R?7UOP zK!7lT+SZ460{AD(WR>Yec4V*?W56_`vv|R(s~F?PWXUw!xqt&cNGh>1PQ99FtpF!1 zO;-A@4EH5X8c*rxq@cnfYmp|Z=9r!#S5SX1rejN0stm-f;7kqi9T!aDB7s$SM@VsbIol#{EoGkUlxt=nK`90s+2-C6Vxt^XZ z7LablWd^|f0}lAWFa92YV+~ROrd4wM8-JckKoQ#SIvq z)SntIB4rGW#|LDxsvuh$=P}hC&VZX<+{(su(dxSZ*!h9=cY4%1kU2zn8N5c$r>w54 zVVt=6x?8)HrOL%xqqmn{k!F@_lt~iN_?Ae|6v4%(o}8>q+k8Nm@(7ZI{kyY{A_oz1 zxFA=qWZuyOueI`AMLU&j{!^c!iyqOGngUZA42?D(N zP&34=;5aC2S^Zl3Q_x|x6kZ6DA>4Yc)P}6#$`nA#fr{*14ZBpLEHwxW8k^s@n;RfF zDbY}G0yYmWH=KG~11b}R#3oYw4Vo~nnCjSi={I^xzMfLOu>bHdYVsyVioS{omkWlp z6cjzACIUwvnruqAT$+oyGaH#Hq#c}?zghHu!We4<&mNY3v4?RS2{0k`hkrViPey$) zh5R3`tkL_^##q0im=C^hRMa{Sklxm@%Bs!sDatJn>kn!VPwCgG_%6rFOAkv;u>DJI z8*Jp^Pl?fG9ik;P)Bbj(f`0X685Il&3Z#=+@S#;@lx7)jsvdGKj~tD8)YQ7IX#D%h zBwL{1uB!~FBf066cmSR~kl=-QjYc=pJSk)zbJZFpX1@&>aXal3+}pKUy5PeKu64Lj zAoBeOw$|iJWTRz=IGJjNCT?A>V#)?Vn&!vl$X&eN52v~Rq~+&ynGNZYkQ%`xT&#mh z*d5)fvsS7vf6W*T_7(F-0nFc(9Lkj|jibZIK)Oea@WG0aEmzTRidw4LAOl_prF)qY zc1+2(l~Dl+LH<7wcCdrH#lYREZoY&AvLS~r3=YK>>|NHOM zf5pl&Pa1-ly6YvZM{^ORf3k2HtS(S^wA(={KyU(3k6fz@9q!grTG;GGpfUVBk6M`4 zi@JmIN?e3q$kBq08*<)s!H$mZR{#@WJ(8Eh3|&LC;5vHTF-|OWU}|{!bSp^MtIn=7 z03rf+b!;7Rf0@jc}@Vc+~fI z>hhhAmaihBsd@qlbc9VhJR_i*H9SsC5H0osB@~S9-*JWx!mCzkHchF#pz-t|(j8ad>}QI zOy|WQ!t?T3xW@C6Zt7<)x{lGNM-{^AFYC|7GS|$PZbIqZi~rG-W3(`1p1p$TY*sv3 z|G6bVR6m^KJI_xX)nbYBPmrLC<;bQTeF*dOCwypj1JhJQHONlYo1(GcXaJfn1Za#y zT#6NMAW8KUq!J?oFg$vW$@UYOK;P(!Mt^$>Na5MD6157K#Wjy=P4jiAsd0Ejd3Fea zY;9P)q46Hd6=Q0twWyc0v|&(uOXCtVFuqtm`N_tU$@1l2H6!jvC|}!D1>~FOg+%`EE26q^^6v`7LFyb)=2W zd*_)8yQ?^eH5ZsE06wXZP+H=CETvxmvPEX#@lUjXgU>^LTVH%g3w?>0Hq>IEb4we$ z9b~L>%cQuj%Y(6fXY`V_zU%CgXrgXmbco{O+hg}KRQN<}9WStZb!A3j7|5ji!z zi#V7Q?Am|jTY-p@l1JKir9y>%a7YSeCXT4(Ha}>V2`KFCLODoB1e`H6=iRQ7pNmwS zTKYBo&%r&}8ASNwnvbTpny! zo*I+@)PS;&X|-ftm89VOSeeHd0s}l=o)GJZ9>uglrkoR3mX5cc`?3De zpBmA}Bx^`8&6EGx12S1;Z$KX0=MeF-Iu6mpF(dIwWC+J=t?M-5Y7GMTwQPt?xlfhe zcL+3El!#I+M_3n6dU6B1dti>D!I2V_b=VhbhLLq>7G&-*)``-+*kP^f2UE$y5ao7D z$t=9VFy`mls~#@eSh%<~-6KkpYFB&5T;Mfg^tv_)mjV6}3g2fa-6uH;b<>5cFu-M@6Rb(4fYPMT@SO|JMy zQ*`VY-`1J%xCrIumiR+RC0l>5!mr697sae*nC0fPzR=iA#!#HfN_9AS`~ zt=Y9qkT8=FMO+fIMPs;d_<5hl$%wPBjW;9u!YuHs0i;MWf`Do1ZY2I+&cg;#>o&mk zx3EJt2g9V+(4a1)dKMgWfLh1gvQZ3|iny$4Jjh{h!mfW61etSg+=P=s(^1nBwQ5>v zwL*<^k;D;AqNRQkrIzqR`~KfY2_DU_54aH=m0 z&r6^Rj*Aviee}Sh#xv$(pf-XikAf&lgD8u`MCq!0n9wmkn0ejv4NuMZZqQwk8cB~! z$632R3I^P06guBP0prBLY0aRSM%7iwKhogcSpI26Ca#4fWMMt`Y90$O+A%E}P0%NY z2I=(!p0T zps-Uh$~pUTA9-ErY+;C|D}V1xe`kTw0LI!n=D+TF`*<@H&1S%5s>?$*&{c(0Yjh?_ zR1pOZwCe+&U3`ft5MS!0$f`*V*Wli!r#HOs1eYID>67+i@FhZ92LhKc_?!Z^qmAXb z{8}bNA1<{wLj`nB*DoD~Wq@~1MtMg=u%ZYj2Q3l@a*b5zqY*+v`dD7zi~M(KIR(pF z`WLG=oK_>@oQD#uaOQ3f<+*^8F(a-p;Ao$d?mfa@tLoh~wjBpU$d!46YfrqT4ZA!1 z9}+UNF}X`cS^!bTx!fhWT!zB_a~s2>Q08Z8qsWa^Q!0dFsI#?xqmzpE_!z&xn&@2> z%`8eJ@A4k8vKqJKAx!4k^cDo4(W!Ky|EW`~T;8?Z(RbTGuo8;$f3x8h>-Su6t&hqZ zpVN9Q%zzsffv)#f7tQ}Kz(#+g?CS-@H%06oNrV4>G!}r+g)GLn6tY)F6nhK*jV{dC z(IgnfOhpR*K!zQXVsBcTbr zBwkXW$1s09G*P2kk@pT2JppU-E%!I#o)2Aj(p?l?;F^F8)Up?9opWqn8`_uE50d;0 z1c!m#rWD|T_&fz=L0b}eQUXL$H8NiW3dOVf_O``FQyUHgLt^u*-Pjga955+mF6$V2 zSy6I40Sy42d4YDU?XjmEuT8JxVA&LqE3lbD@|?gd70H}L_-cX!rwhhf^)<^IC%Eos zSzqsNr|F8)OJlEMlj{6#n4i>&M)Wv)GS^=Uw=Om4^~3j!`^9z)QOReo48=u`g+*YQ z%)>FRT->^KW_LBX;>a&}5hd~kyepZfldhnJ9)0o8nXIM8K+UQj4|nZ-wVBA)3r@0T zHs=AMd56WK*K8|eYca^a<}4G!6#`ScWJYcSL6YP#S!_HruuV#X?cLU12-r%E^*JZ1 zGxII~aM=s$P)`8?73E+fOo+6mJuz;LSi>C9Ju*cI^oJ=^wu=b;oMN0doxNmmC(*HG z(9{3oz+2s8Ky!+v{-ju)EmNAR*9aGhtCxh~*p=I=7d5KyeLx$=)6ZAFmd11=h|$q* zrgVHZnQqoLdCfqnVe;)TnH`}@uR--zoa|gr@WrnR*i z?S3QeC5GraLb9?dC*jj|X$}4;T8?AENj;>jh(7yQ$eq$#?~DBzr7p{Q;@+Ij4bWd( z+HdSQMw#w{VcH;&*Eu-SD?J3St%Wp(J?RaTjqV8Ssj5+x?rtw=sztp6fMSy56@G2Y zOvT@`f!Js8cu~C>(Yo6}st;@;G_`p#+>~9a!7q4?)oC zx!f*2SJH;^(3G9n6g^si`pqaJ0x{)u6xkX-d@!1=pcfMq^>muYrg+w!BBnzWXZ=w+ zCaxOK;wvNmPZPt)z7E8w5`(l!LQy64o&#jz(2~l%a+RXCeNAn40x}ClCz5;{RywY* zzkiZOg7E_+?A+vm5`OHL1+i$ID1shj3=I{IGy@zxjs7k@&&L0*!)BAW82}Pg>`;r< z*^O=s!`)hK>yiyI^ngRs2`>?a<(MX3kfXiC4?UMbP8>0egxWk|Da%7AnPRWL{dly$ zeS3k=PpbY;!ewkKD$G@%A%toUn~e|u{Uw}IqcYPau`R95L-C?T8=DN5LF#Y3K z&NhT|puh~oR>lZre|%$i{UTIbFEFJTVg7TRYf4Hd8BRefK9X9CvLHm29Z=bWFd_!W z&AGPbUDH2ZnleXslK#e0w2lL+EfM@3k)@u!q>02ogirz|qhOsYh*RcxQuET=l^yFqp zP5dJ?j0M;LSzr&SOkPeeNCa`FlfGKR&bKW<2`oJEK3l5dLp~4GfEnMY<`lVUqmOr3 zFC6X8WiHzWf^1J9TjEX)O-6p197qEZK@dG2*-(Yh`f$fZ%hX*LE&-L(wXKtKh0NK( z|EPV>`8s}+XH9StA?5wVZ&3r`1pa>)iySqRhOFd~X6I$aa;;?H0<8|FOmHlUF9N+H zAObi6zbYgFh_Y_uP>|Sn{HYXvp$|$T@6%&OwS@`m_=-o=YsL|B<*WwVZwc}Fw1DwM zX&vgM$en*CXa zb*h`1)0t?2c~dt38o!K%A)AtB`FJ+XaGUIO*OuYbkqk^Bf@#4VWt&@Pnc>Zy+XuS- z!H&cIU9Z94ptj5DncDixwTLB#$ut}tq7T?f2h@xE7l!A$UzBn|CU>j~rXh=#fn`tB zMz)l)g;=)mu#2Nv>o1Zq>w}8xVIl$qc-jnbVK)R)YYED%vth@JIx}EBg=Tb8^9s^y zulhHQ#t|v)OoN|TsY$6Iry#N_3vD4uC-$PNu%L(Fo1lv^|9N~VTcLf1K zV{gd65jjGK^#h^QzM!!y`GoUXY;NnQj&*S%UE!?95>#``O3XN)(csNWL7Jmav4L|N zN!L8v&?}}(P(TD%nEXQ_d zWZKiyjWG=UJ!2@U2p;%Tr^nF>(a&)KM-)bvIQ;22bl`| zQO@+=ixXaOwej%lEMPR_`idSfO%B1SO0s%IXdfTJc`2biHI&2ROTGyceJ0g4D3=@vg6AL)w7l>r0x_Mx z!ZkslB@=|Qb{=wc@HwpyYl{&EVLURS%XiBnFu8V2C^s<;Xvg%=s^rG!6v1Nt8#V94u{5AhG@g%@V8K3E zfMIya9K4d@GdxHNy0!SK*tn>@ebYLwUt@62cpbMi?vNb^DiYE*esC>5x^f(sG#yLH zbf3Je8Yb6gHMt`zW6{DcR)A4%4wZ}ze1Zfkr{w?W$7(_F4w!y4%GD9-UdKRsM@Eh> zVe)d??JCm_{(bHZS^cu@+UiA<3cEVz>j>gry4k*fLK*S()`_4Xxy#9lh^_uFzW~yJ zpeS>*51(fq=M_vIJHQHDm@1CZSpERnm+5 zRh4aMJ3>Z7Q%-Mc)R^bj0O^$4j4>5gtzB>~b|?n2on}X3Hj~UrhOMZ)OofBiBBbrW z`24}k%q;3gi79c-56Hmu3sb;Q}yWn8;4DCZ5(fH-?VIs|6`f63fIxP+wpsVJ_MJFn80f z5S{iAH`HQgGJ&#R9a+TviiejogppYm^to-@tPw(58mn&8{ne=5lGYS;HS($)=aoVg zW`oBhzu)QBA`WqHy#dk-!Dq7_tW3(@h&TzPPLUp8^fa_QJ!Qa?MVB46=u)|wT9lR8zAZMCH0gaYd z3O#pgCL9ygx4p6v?JZS!9 zfj+@oa5LwE&W2t~5c{1qh09k3ElwA^EGxt(9SNLRglWTo|d=GxW z!S`-k(PhpEQ`+`%P0l#`7zH(v^;qaO9Gj+kb}B2TvF@K}DSpN8($wNF ziPHa`mlN!`*}W)*tJqUULsqV|l%UrvGgQqIfCN>HE@-u{0 zHJ2{w+2Fc#AG-A~bzNN18xvE}S8cfJt&9nu;}Z$#$rPfm46Sd@a9kj0h6y3N6aLK- zOOrqNTxJCD4()TVO_kK=jOJ?H3e^l^k=po%5dP2ry}y26+UlLTp4feI^_6X6yqyKu`m*BzE`0om7+^|d z6&k@N*LBi;ijGP>PVGuvfJklRddAx48R6N8KdHf&@m{dqa!osOB#&Iz7K5x(zev{+ zj}s&OnOlc{dlxMI`1zO`>4!#oRSEOoPB8<%Zp3q z3gdBy=Z(|zCt`4XJYatuLk}j=RLoY;@`}1#@SUN4ViWiKK({0Ni}Gf!#1FJc6Ubth zZdIYc*JFY@eiJ1j9W{>(a+Nb zKkL;u)npgL$~8R6sYxJm zS-Ni*@NSj0%Yxhf5aMMCTD}7N!~w?$zA%CGbbp-hLoEw$j@gcxVLJ$BOW+eglsGx0 zG3xYRe&CY=++_t$js=YmL@-B0)CR!7z`(%3zyK~IuvZ+N#zqKPBr!t8nqGDkXjSgP z^k@%P^gs3i{1?Bx4?iYJuI$Y%L{FUXXM8c28K8iNVWkdg4#g0c-H1+GSLq<9QBFu3 ztg>%)smn^Hx7XTU_3DiA8D(wnA7N**45W+>TlMOns=5*G@vG$6jeD^R9Q!p2=SOZJ zb4_-^LS=llO_vUuKP6=b|MNBJO=*N>mST{vXN&!`BMlJ@1Fh?W&k_oC_ixapGezQ7 z+UvLH(m_&AIvLWSM%uDHwX#az>sN6s2Dhp>*u=mdzd4mLGQ{%tcyf^<+3qc6n^eZ{ zSjlDlr%lq9kR@4WRUXU2S5sLLEPu90CFBMd=UA;Nl+ud*GAyeC>1>e7hYS&c|L@)& zl{7NMO33yz%4P`7Bvm%@^0hjfR<2OsSD&&gL$J$|aDyow^i%$0$ok#pln#P&&gO~4 zYn-&nr>}D4GhGYwTbxqLDEhP<__fp4vs*I7!B+p?;FK~lL^iCEK73O;NKz`BAq|GWr-QaUI;pER=+!+f93;gg)PG=$PY4If5ka4kHCvP}uA zv&ABszqt)Kc5SlG{$_~CbXl_cf3!u}RkDNr>eX+e@nM13&tdwx6^qhGj)Kn%TYVCOU03%33)Skyl0- zWAi8-^fkt3NCS-T^=vdo+8j|$o6R&K9TX$1ks*``#%_nQ92myhWRwn~u*S!e2VZ@( zX?*VXRv#O|FCVs!(m^n~WJRV7Dr5E-+R{PN(ex)B3pw|vhK{{FC0DCOh^6O!dbtcJZBm`d+9 zp>)uDly(N+zb2t{(1bHic7S;PTC9?Kb*ZL4Zw)JDibb+Aoa`=e{93<3C>_+bsWT+L zf3=z7@}B-jrbxU;<7-uVbcV!QBdihnlMaH@|M7(OoA1d986wi@OblSxHlK8moNs2l zey>TN9+~1+SvFk@m-;;L^a@*il19E{yjtpGF(wOte644E$e9M!-<>x2WH(DdF1x_* z>um2y2l<=-44I%4&h{T2yHUozNrKM;=M{8$tNH+?TQ{}?guy)w@N zd&3$V{pcVHAH1NhmG!;Nht&C!!O+|be+hx;KMq3VyRGed;cP3h==gO&8F%uuza7heU0x~V#7*it*zErE2Pb9 zq?9F=&vn@fMNPMYU|6X)`Vu}jkg*c*{$rcf;ldHdWu?kaa8s||WQ`7bS|#=PI`(X_ zO2?`Rqq+><|F^EuL0&z>vK%;iO09*3<-l{&Z?ML=nd}vTRnqnKiz z9tnG|`VThg@A>?Wbhha6Cqo*duU=t%ueC=9!H;wDq_QlJ4yw<wn(FcXhZmzBEu|NRv#*xY=pnNHI0*UMB;&01XBv@ zY>>h#Ir6kU8XYvB%VtOe>TV#64ubKrI2n#FX{$3jXf}=1P8!q)^ADTL_aa*^w{l&t zPj;iz{c*+;ox(?7w>cwihFE0FCg-blA-eZ7qyfa>^q#FJY{?H0wqJuYIw-oV?^ad) zj&Zg(qk~#LoD7*d&ueV&6EelbeEM3I(IdP*~{Ms9o(I-PH z(R=K2HXa9V*bdv1@nJM;j1Hf;8 zvQZc#QUbD_S z!f?w;Ti02iCl)&0VS_Qo&t)4ZtZY_wyYm_=PS()Iakv%%$(DvN1{*L5>A&e>!~2g&O2IdY-y zbIR9ZM+f!Smu$JXgx?kcu@c@?Q|D3) z?2p1mVazhiqFP+mfh*g2;A!9}V{Fr_WXUC}O}|MP9aLwf^}4ggTIpj6CevO`4cHsL z)=)pgwa9t#p&#DxmaZ#IEt{T%T5^13153KI%qzPkRg$l z@@_8cHn6O-{d|WXjmC`!AH~HdorikE*V;y)t>Mg$d&r)|&LE2|~Yz<*g%q3FF83v+PSy^=#IMOIz zdoMcZ{ezVw7>Td_+CQ93xmk8Adl|6omaR@H{e3QN{<1b>9xgUjS3*%1eBJ0n8Do4; z-Q~YaI6V^_<2U*B`2nK38_(oX+TwRthBQ?8U(eRx4wAL@^F+e=*&Q4Fb`Wj(lO-2> zG1tYQ>;%J}61Mm4pq5WkzEl;2ql~keDh!QJ)$5(E%SO0H*=mjG_hPL+>4SSs z9V@|@3ZG<1g9yIYW^Flna{0*r+y{Q~o2>01k1tMcAaf^;y)w@$g)P?h=LwbayS9Tm z(&$z-Y1}Kr|6+rcz85*upi)X>d$k=zJe@WHf1}g{)Enx%1YI}7MQ|rQuedMC34vH#TI2(x7DZY)-uXw$ft9!%fO{c zW0G)nWo%G(giM*Jtn9BVUF+g#F#P6w?MWz`A&O`L!B*4eN^le_ zrPXEg*s+@-6V&b420l<{pxy3@tL z-tFCDY#KR&;Rx2JleUiSpnntgjP*?#W&9}FtiJYQd*ug+X1XR5dixsTWXeQlD~o~U zT3|V8s|_`pYjGv|#ctm1py&`jcDQ&R&{ziVy+*sIWC;j0+0A4D;mEtG>B@kxoZSwR za8}9aaEl+d=WQgq&$mSy)EX7ri~$4x+Om(n zBxii*$mL4YWudWfBrA+BKH1!V_GAYLZ&_o7vC-EsGRBfkosI{?G}hVXYx*p?aC9qM z;jXXA*ODa{IwOs=MUT0`OXzeeM#Mkf&P21)hefC$^^$^;F8wIkWDo` zaP%^6s!6JBLxN1RriHtHR%~#R?ttVr*v#$YohL ziV;e0U29jSxL6BESY^>!X@iw|WQxS6syG&g=IggtpF^HhlXNW{N$It{KcCd+x||3{ z@%?2j{Q9-(&_Pl9XW6|j+wgJzd)?fjgXoj}nBow{iB;EuUwyODr&2aUDqmrIZTdS` zAk?+ANq_n%S16p!>P#7q;x~0KXM(@ux9HG8-&H<0khu&Pe61Zi=+)-|vUwtzuIpyn zYjuW1|C55RHt149zEC1KWu>t_uP?d5gg@DyMwd)+xf=IM*h(jyk;>P+YjlqI`tWP@ z%9#cnS(jAV23!i#wM|;7WJtt@tziD&B&BjPBw}G1ooUMF-(sdg)9RpE{tUud7`65$XtgaMtgXrC}`By60a^c86up)$wMkLgYut__%IqPc~ zqeuTHXLZnnv3k5Fc+U7*oKH^9G=Q(a^{fu^2w!t1LOXh-O&grmLEkT%69BOsAbj86 ztPXG-1&D{cP@+#u_72T(Zh$7>+Ov`eK{${WCzq8HJ*$Igg#Y-Gp>7!_e6+<$XMFG0EoD?^ zeb``EN*Tg}S!(04Bp)eXd$3lfkgwarDj`!ma9*sfM#%WS)~8%ykgUby^llVZPX|LY zNw`+nCamtBhZWs-eP%728x(r{jsRcdgB5!`eKNe6FOA#ztR-&5$blz3P)84d%Vq zl-?w)KH1^YFvf53wWK_`P}h1sTfZ9Tt8c;vUv<#Ge~%*=>9Q1MMR0txy;mLdCV%58srW0EnEg)#^R@0Ycpg%KzroUu6l1 z$3{q(aW-1rL6Y`_QKa?dY_qz242}J7vbuvNM7np2)g1&W4xg^y)g9Cm$*t@IckI|; zWiVZn*c^LV7FWWL!uG2Fa|0PGLctHd_FiQ(gx3DqJ|Sd?NQ6+<=I##qkF~u~cYC3+ zOm{01=j$fjV*qk=FNLgKWfuBc2OePxr_V5SR5Z zsP4zJdEe-C2R#bCyY|mHohKG}F)X#fayoU+G`JPn;`FZ59YlT3zli>8gzz;uy(Lp7 zGMnM(V=YX3&e`7d?#LI5eD6bHd+G2oMPkOU)!#FuQswLB(ko?&=)oAj&PFGN(^-P4 zZk<2dl=Y>{y4bK+g#V?xGsOjivJ+84!7!`{!-w5kx_pw>2Bly3BjJ{{+mk*eODq?= zJl34vK00lNh)`Bi`&*6Pk|!5z@<{!g$4-__5gf_JxTyR_qdVy3l+O}No6#Kv|Ix>g ziHN$#ZZdjLo=|oo#%GEL>|U3X!DgMUXUE794e8hUVtmmZL}i4PCz6X%ce-%&xf+*s z;O9}cZ?Db`WIEHsjnP}}3ZK`<5lJMg?Ecw})+r@ZB>KP2b_da}P1`Lj?gYyT-J5KG zmP9@$>=xS{`mHK^z321R?eX!%@-xx~JN{#eOaJxBs!3yE_bI)-b_cybT6vOX)b|TtW76NEpVjpWs}g~ z=JG6XHEB>Uwg`Pnc7WhWdTZzoqLT}~mleA~=na`-;<9(caxgrje|yj!1f^5T34p|| zv^|6#9b5_mz1G+2^QT;)NSB>p+O_#}2Q_Sv^m%P=FmX9ngd_SYblPpYgv{XL(PY`J zs=L7Rp*Q*5LGr!#Z1K5+=6tUc1%dEeKX=e~l#(HGK5Kch!RHQo)1|zC@gZ#QdDDK? zWqaALGQ>h1!iKWdrw;l^A>@n2zxh)K`GhrcgNC(HDy?)`3^=0x)IsyBe7;bk+T_!h zJh?>F%Hpe!w)k{XmRPRLn-SH9mEjs|)7Jl_U+Qu+{~CPxo+&0ASP@EJdrzPHH)V>$ zYxu2JI-Mszoz8~*0I8-rQ-0G)iyOse>Z* z_V~O_r|lduU%GtqVj!HP%Q}3EPU}Ur*VTCQTf0vyAJPV$K5gjjIsL8?#@F4RzRDB% zDwLGY$&pKals2alLQXbyu5Fakt{uyTwK^cKC!$CAs` zl)lFoN)&5H@Q0$N1+E=sgY?a3NJIqxx{^h~4?n*4X#aDh;!tnzwAu+yD(xikNxj<2 zP8gjuO2#}?l}ExpCykUNl8N#}@7LXAed%0bpbxeF$DIUvA?pNiwn!4vanfSqjI_=2uJ=ZZBu%U zRCj!DQqrajX%HQDi;_A>LMZFTlV&XTQ`rnl zIopfWLDMIF+(0!^*}iE%8JjwY#*WR91`Z!=-qb-)_WX-+c^3M8Q|U~Z(0u&LQaHZZ zO*V~_G1vWAIC@`Ow%F7`-5;MJmFgcVyLD6eFC~<)VMnHTu&9G7gcEWCAphD!+07A` z+Ef#!!Wkh;E>&eYbP1_!NaN&eIF^H9*fO>Wse|gP4L?!`MM&o}<^kJOQ;U!~h&HA5 zIV)r~qE1>{Ln5r|UY3PxjjusSUC#Jk+k+HRhSZ)BKKh)-kM6BL>L4m*tvsM%dt^4ub9unPT%`hGoh3+I!SNU8Pf|SgMO*slON7 zUo74)z15%Z3~`@Tx>Nbp-GJbT`ZqeUM2DAxkA9o~zA{8)Qdwz}@ATtKNGV?|5DZ&+ z>aBmi=E$W0$v`&0)=Is>pY$;V(tk>SQpgY%jx07+EQT3r`#K%;CX~+=jg6>rMsKyw z2_vl1Iv?eUg(HvHtv=R)J$9p2z8D{DvrY&7(>Og0vd)+M0P$EMq>Op+*ofBk`#BS- zCJzEQ>et^MYlHPccb-TnNtz`1HLTuV_4gd{4r}b`kn6VUJz2srCaqL9_c*7U^`5*E%tDQp%7Bj;Ms+@EfEPPKG=>9W-I3 z)PW^Q*7CE$%9A?hZ`@0)viY2qayHY#GF*0?jn3JT9WK%z^g7$DaMsOaug`={&Iz9( z4Kn&Vwm7GQ>XemU4AR%S9jsXGb+5}V&<5v>5BhxV%{im3DhSPPXL{h-usx-aBN(Ug zfnklaRXH6L-5WB-GIcObzt-k5RtP!MAiEpsJvr&)Or&G&bXimbKde66l+!^|KBQc^ z?1KxNl+!`apMI3nK@V$;`~abl&en1|sJ?G0<5$^W{pJY9AB5eWoDOP9t7M9$dTi+( z+e7(ea|4-v^kJ)UPRbEoXZMsc&RU_v>W#`-DMLg!A7!21W}Fi3hP27p@Eta!Eyn4f zpWl-iTqLBP2Oii4+)iifI348EXXOe-Q{4;_#93LHGMR4gNZwJECX6)o$bNtAPW7RArYswwtv$>Gs@_E@hMw<5<-?(IJy>{ zw0?6LJ9==vK5X=9w}VSTWSeg~NYaPR6c3X+M=+B1t1Z4+>obIfqVC3ZSqGNit>1J| zb=q&v2|rf(@=XUl`0Y&buyGfdHf{gXLH+HLDJHVYvN*6JJl~Yv>O)yg7M28c*#~|a zn?LQ!6j^qw%d;?D-YAS-nPR!_!}lU=^QD6zt?}Jk-B_D^>7X}XSGHU}6lF1N&TsMM zn<^{AQKfat*!pK>2}m_nDq({!=bP=le6Z<@|Gxd3kR#&zthG+~xPgrH*pW7`KN(}` z8f&9fI*7WIkttKvkJfIpN(cSVo}Uin$&be-t8~zV6;5stHH{rzwpe95Ln2ufXV~5N zWYc9$*JE~3rNY3)5hX}gL6WS3gm7gzxGt)!=}HxprC|(&D-{MV3K^g5W_+@W1PMBx zDq;+TU6kMqzzI023M#YNFV?i<}eVR zm{1yASe#)~WqLdi6dE8Isk#-K3QCeHICGgan0PMUxD|6W_OgPNFl*qce9zUtYULz*K;XwP!L!oAQ&Vj5tNl8u0lbo zCcdu5y{wCfx)zpV%L!DT1TtO#02biK!%0LO4vriX3*_REfkZeM196#yn<2OajuH@V zi*;#@A<0uOv8o-y6O;iTP9}5R}b1!yqKb5s8Yi+oiY=>`U`C^P!ma)mv zu`~(_bT$W2z)B8wOt!N*Ky{~%wUQ$N4&afXVIerc0Z_wY2h+uc2a{}uhQ;h0oXv#; z2M!!qM~oZ}95`^`h+yR4Y%Z>(v9Jz=49cQoFAat%J7t%RsI0~G$tKN$1|Sxf(Yaa0 zCL>zF2o)LAWog{$%I?LFhqE%~n|Uq^uCN6zhog+LSY; z-^(()r?3DgBv6FQax@7NIKTl@>L@T_yE}!vfE^4D2b_Q%*p4K1=-tR*AXDl*F09AH zJ}&Fxv4GQ;Z~?5crn{*uN8{jPXl#a6)`U$GcP{HmvvFG z*;oY)BW$W^f~icHnJ*KE%mf38Fjw46<2n*~|NWk6LWXypC1P7S!#$pWc z6fSlb98p$7;R0b2!AvR; zh=?0N?DjlxuZp|5ZfAkV0u`3UL|MtgQrWbL-2ve1!7vKni$G%1Fk$A4F;Q%>WOev* zvdeKdn8FGh7KO2ufN)usG+7#!@5JN*!oYyYB5<-zb*T!3%d)5o1yEcdY?&Rx$6gmj za5jMi7z2TD!bi{n4tSNt$)Ge)7B-Uwga?rDKu`dIGZ;uD!;+XZOfnZrR!vzoB_M2A zRg@rs1HK1*kM|xrZe=}SPBv`}fU+7_!sAxl%t{xC1n|T{$=r&^!zIiIGo}X>8B+or zjwpeOAOdz_lpsL@96$nm(1@S_0s`|_r&Gp;F98${Of+C1gONZ$1jNNsd0;?sXkeH~ zA`1`)!j)kb9*o(_T;||r*WKK#nwVmCoyS9gb2xYbXOzX5gINyW%K-&znE{K1;&5;| zjwm5Y$S8|N(!H4G0GWe(0EC-S7LzW5Lc?h}n5>ab*$piR<7i49mM$w*HoKcivix{F zT;+K@E-VGioL%f*8dm3CoN)rPgPfsQD3!`uO!t#bnhONw!T>{r1PKxpK)BcCT+fxU zH^Zt4^LVTeR!y9MkWJNeQCXS=Dyy;!ETC{vS(*e1iew@IalkmPNn#9yvKot;F8Fa# zSsDS@%u1MtBdg*INzR@IKi@O%_vTrOR$$XspF`IgCx5fS{mKVc_Dq zEH1`?V?kzBnN?L>OckBW$uv`DRs5l(>Q2`MTgIjY>!P|DUn;}85%Cz~`93>DW!1v@k_je^4g4u~8M1t4VDttw9fmQfz;UI>>}L0OiC0)HfXFUm4D zVc6AZJnT4`&ESh+FD?-Gs+byJA}m=8wv1U0HC;&7Lb}y-anV%Q&EV^1*|oTChIuGW z0)`+VL7FTC2e!je1pxwOHL{B_y9Enefx6cG>+hX$tM`*=Ja3L=NXrkXM`CJYQa5I7ucGb?#KDisDUjG-_CML_~D zNWd6Y#nj;lA+s5#z~jLPkQ*W}E^C9G0s%M-gqkiYG_VZ28knX+vKI9KrjSk5v;g%0 zwg8r4_hy_;m4_t~NM?eWNbmxjp^7R80=9JRi!!T3A;Gdn^hEFW`m?1;YRSUY+8_EFK0u#oxaexJ~%gV^`U|u80 zuo1~bVgU^+S=^0^1*}HFT91NsI|`qRVPPq&!SDk7xvqw#!6wN>V&X6{xv)e6$sF8_ zlVPuoy=+FugJn>-b1$osnMhDDSOjG&e1R-bDr+%S)?%ux#dLuzP%bPr+0NvXO`141_5HhzL00W5L&zE(?JZ z5GppT>vJ&}fiVz<*y9mJpNmoPxol^;GMHV2Q5P2;j#JhO2HEVOF%F%b}wef zF2V6-VwqGX6c-3%Oxef7CGepxtOT5b1ZEc{NZ@c_2po?c+CQgBZ1Q$7&6DkN;0Zw3q4lBTozyp8?%nC3C0WV+*0?te@6a>r; zo@5w^Od6OtAWN0y$JMR^TFsLIMa9K#%}O z96Z5LAQH^X?lN)6a6-iH@nClDLTQ*Rur+1&Vs;U;L#WV9*X?+!ipr|E*kn}|l4LJ7 zSr-gvATAjmhSJ48m#rr0g5bx49~XdGa)B_r1?&{qy#gd4Fb3lIc5GF_F2XQNX*Tz& zxRiy>m>ojJ#8`$+4lZjZ3`k@c4;EtsM#-^;qV*H!3P7+-0+ zu7>ro9rkc#IRS>D;bbQ&zWVlj+D2Pxh3k*cC1ww-10>PCEgs(JV6DRvrmM!VJn5SZZ9}kyd*TjI~$HP6G zQ-H}}C=E;)DC{`9m?XAbO_?Wyt&Fb)TN!7S-Qc_NcsK_Zumc{xm1R|x)jS@~JsvKB zIs9Nl1YgW!HP?a>m=$1tJX`|C?xd-%%O=kQ3t)vyO&5$!7R6S^2~<{PIRU;H18i|L zcL9%w7iKUB)tI6`?5n+_=)B~8p<55`^^zXgM5)cyp-QE5_vc&Qv-2SvO zWum$nmeu^{tF_K=^xr|#QX_oX=Bz#$5}7edKnP)z|6pW?i&xczf#DjT>=s{6ZXk0p z9cQ=xpZ#bz`0pU<4JTi&tGVv0?R|fgv_DdH{o7ZEmb#t??;Fxq>rdy&{Q0%HLb>th zY}6}!oQd?sm?C_>HtRp+2Z$^B`)jg(*9gKE>(|H^isyZ7T|ZK+nlLgf2bOa>ZLq%l zkN(=LZ)J2T+xxdtm!m9|RXL6xOAjXtXn<N3f_C%<=}+efM*n4dw6dCAvd_#tL00|kOwp?cGSszP4bfh*yR@h+tSs7yKr?0*E;$+AK#$`zg+t*0>^3>sXtNqRm zWU4o>(Wj&mzDE0NWeG*f7`@F%J+cHdT@a3_eAS!muT1%LAY7x*7W*AkTl)W-FHZRy z>~~sydV4a?%9F}g9=?N~Lt2?)0@7H$Rrn67?-26DB0;Y7J~oH%lp*i=JBEkVT1AC zbA|!zQ&}0N(7QeO4(j2&@1TA-<;Ae9H{U;brwr^;)YS0aL9_c+mRKS= zr(`i;kG1_*`CNh2kF~~Dzk1~fHdUVctIfYUD9ZYkGnNdSEF68V|Jdl)N@JAIpOf{^ z+2l`$kR=q4F@D$A@$!6?kuerdhQ{*Xd>vT=lHrfBy|3>q!PtpdYx`QACzq(@o2}M5 z8A97*_3EJiZpsWAtv;V273;#Hzcy#=$B+oS)59ifl#eeKj-LEit9+cX&?sBi`B(zN z(bagUjQtJP>LB?qCw8m(xetDOwa)3!<+t|=Cv`2=Yy7;P-Q25#n!Z<_T%cD6)t7H} zeQkPmkk_w}ArYPFsW<7>R#t>#(-xx)@Y`~D*hy0@$%EFmF6_ajZiB~z+O=7#EUZmg zOnWsW<8m;M&wbcbo#~-XZwEcdriIX{=e>|J=YNKupIaj83DgA=pXq5X9? ztc6EqVH;M`s)P|o@{xJ03{!RN_0XcMJ~pCthQ}lE_y{X&X|>WkLI)NDR!tb!jVuX{ zZsySuUX6`t-?bmy>O(urCW+b5K@3ecj152Cu;JtNqdn|cPNdb!v>{P;fg{Wzv~K0P z7SsyFG(fn%TuV#{nT0h`_Y(sl5RW`aTHF-9W zR;9~o@J(eYT4l4!Mh`BVflZ=Ei?tJ}L~zY6yHVdC)gAP3->@Vum1R@KGts6u=?>!P zbE|tdFfF>3Ze_Ff%62~2QrXH=sqAH093GWY2R$katFFd%SsBZ6-D+benxxZ)!$Z2P z3&~y!kNd!0nMY-(gFd#-NTtG!&aAo~28&skM`dN12gA`&cY>wLMm-(0sjSK)!%~$; z@~CX*x(@AeA`fg+y>yUN+34Y!cuW~RFs-G79+G7_MD583X0;ldN2#(=cG-=aVIED! z2+sqUZNQO(>5{7J@Zn^a)o3gYMvLnpNz(&Mm9?-Dj=pP8mF8Vp*VV8rhsSIOJvx>X z5k1;LkIKrZsbd{J_G)-wmwN3WENTxiE2}KVGx5MM%kq%23O}%$*@nl1dsz(%!jZmoN!uH@h2&*j2b*qE0hf@bVI1z0+2)pdfz&vD&4$AOYR|8iU!#o|#O0;&6rlPR* zv91f!po1Q-Y0GITi}QbZB+dUD{WfL%)(!skV7}Gbyw)n~cdP_yvDW|hpDLa1u@RTI zzyF82U+G&C&FNG0CH+X7KBV;LOU(DopZ^Z}$obh_z__gCpGr=R|L>sx%SNV$_q>Cq zuFYw`gDB5`*8h)TFG+Lw4yxdUFX20gq9z9LF?+%P_pQGFe}66NC+Xi`;;Y8aub+qi zdM&hCEe0EpMNuA&#?oZZ~p%O z?jT=i{SCgi_my%^KKHSEiAmz=rj7o&{&#`9y_-0M%x{f*$H=0wb2%q&9LcufbJmZO|}Sq146eP zOncD(`E&ZbvYhNLOcnp9wEEv@lT>_)Y5g>N>LAXhy{Gh223{Mjvo=|4rLewQ*+!jO zSEfy;(y4(425P^*+$SX@bSN-4cl(pzbfHl>iZpObzb+MHBF-b+zdWKP;%QifI~ zrII=*Yf-kOP(Dc`bx@>JB~k}b4=qMoU8s3Q(oBPow)bde zY3lpWpBS1+qQ&o@m?hERPm7KfX@4ySv`&wWs8bc|bWojDWit)d>JM&pI}-zE(56o+ z;&WE%UY1@~ris+j*?!JTRW`%4>T}-8Y;(?OP6ut=X)UeibkOFq4L%j5z2sz;M5}T( zm(yPMp;6nUoDQ-oElMl1Fs=t`ENV1&B ztSU=`f;a93&S__5Wg3LD`gk1LgR}ZntlQvkI;i?g%+uhT4%#$eq9*>2VfUqjZdF;0 zD|zX`s&r7tMpS7{63uCq7n`apsnXK8SEhB9RaW_--em{Us7nX=gPUPhsH(CLJQ$Y_ zqMVFQlEt7+DqEx$MNN=O2g$EfQWRv$iET{jpqH&mmy*gUrHx3A6&sDxL2X7Eqiir@ zSO%@O^hP^2(=xVf-R-rsS3~>CayA~@pj9Y86h)zI63P}~YbdJ^HC(?98;C#P%D z=9BVC2R$hpd>3P~VD-H-VVe9X(fS+wco*JB2h~_vm?P+XIX`EEj*qTlq`smMAXT-3}V754WMh8`w zb-7ofz3f;CBBN|mQZ1u{s!yx2d5jLam_8SyV#IW8Fvb_-#n8TMM+bc}2X>)(bPz^I zi%peouscpwWkuS<*c`@2+k_E*D9Unx@dCzAs~gQ+k=Nh6(Kdf_XtG5+aWX8- zTMX@e`yF<>VNrw)K-rAvqET)F1Rg$cX z-B9|Ov`N|`&9Zx4jY(_zDE!%~MQ1iS;dHe)v&%*roDxcQLtRO$5;iKKjM7%qmDz{Z z5<&?j{0Li(ltp1b5*wcmq)ChjVT7?I%d#}@Pn)uvqp3cX1{*rw7%=&{#{Q%^Rf-(pXF4UK4gD)S4wu9_- zr^{n2ZFZCGpv*?J*be$gv~EwOy{0Ax&EdwqG@5Z)NR!Y(lFcv&mWG-pvdg+?@1SbK z%35{MWm%d#2%ECY6KT{zZ93?hJR6OrNe5L{)}n(xm#4LZq{>naI%wmz2W6b@EuX`% zTRk$LY?2-ob`%Q;Gzw@Cs;T3cV9fWjR>len3KwJGvMv^IWi^t;$!-V3E{0kDWSGaw=u8=%n`M@(Pln;4tj2^f zunB>Al7UDj8Hgmak_1EG0TJMvSxMqZVu>*jmMV{Bk$}vD49gN6-~b0h2Lca(6C_BW ztOiFJH^UG#mFLPb3_wI!AQ>$4OjUJ1mnW;PCc2WuvNRX7F3r^0RMUnU777X9{0MO21thDn_X4|Y==LU!o`t;;w-GPri=`GT~2m+F4(dv zjyjxyxLiyV2?Yutin5-{cGzQ4u!3}9Dez@!F3zxtVyUv2Wz6c_4Yru$$&f4u%WPIz z5+{5pgh&B~AgVw}B#;2#4c@Rg7y`|dRhN@_Y-Uv%1yK|^5)g0!9*h8i5h!5kT9;+I zHXe^FyOP!6Pj!J!*CLxZk|21l>2fmcf-%R1BXVRoBFDv&3nU@Iu|S%vjXP;v5=)iM zurjO+;|q&3EXSOHKtW(9>S6}U1XGz{69!UY48nscTpp~l9K3)N92MBafJDa1JQWq$ zWiw1d;>fTrX#pb`fDTDn2NEU~2novp{!j_n zJsxa7Sw`n-SQgeTV2*Sh6n;FU+o@m|u|vQn49s>g%TWL`sZbdBLg5T%l3|GiVv%GH zv#gLIu;x18Y&_P*$ryvOj!4=}*aanl1UTYg%9=DPD9Te+WLEY7_JAXDaD+fTfML`F z7)Ox<0Ro0Nn1PajsRuCX0Zf7uE;<}JFa-!MIuzxp5Fp@tU36~8RuT$Cf&{l?tBa0g zB7xz|1Y<8MYy~z|Ty&V7oi1OCi;hH19t)}RSQm`h4Yn$Zs%~d9VVLC#0x+z(?nGVo z#tSO~0-|y$P!Mng1Vkm`izOf!_@XFVaak7?*wwH)S5sM>*~`f~6$4Izi(n>_$WfGa zZhoDgLg#5@h) zjC(Qo@o)(aQ@}X|SR+}QhjdvED^QpOL;xHNkzol)g<_##7=al$0YgY2GR%+10?9=| zqd-v@Z!$1A!<++~RUeGAnK%!!BLEHs41~^fWeOA~O%@)k0Tz}5g-I?Dm*9vT6&nNv z_qYI@(D76Vynquro(h345G?V5!AF3{)V>j6%RfWn)H5^hhY0!|F-JV1*>L>%Z>w9s3 zqr>2sq+2Q2V9Y(>tw5AX%`sGwA<%yK(&+TyYZ!28(jgdtC}eH1(%pjfa)d$@HqR-W zeN~W9+0+N|;_=QhE&Bm3*AN6Aja{p=e59@EjZ%_A zSr!rk(b7<9iVnZfY}kgR}^Az13xJ)D5)e0b)$Gzhs?|CXQ<{M zjvnAAw{Q!#&eQrwF^^0JCV^$6vAq(X=BzQTQT!bRW-l5o08n)fpbR)|?O)Jn?hM!i z0F45T&xv&@QI4k8r9lA}EBiCEE2&rD(c=^%2$5NVZR6V|7gX<^FEPsI~3 z^XOCOu5~mF!QhqJ3Mm|MpOfd2OtU*TgCys}lf<0dhYG+X;Uu7)q0GQ~0f3lmUyyiB zQ?OVcMlVP}piQaLud#JG|0a=KAdE;ebQ(%o-k6Gb^?pFA;`p2M0LG{CAu~EgN zJ7G@I_W*4F+Ke>QFbh&}Rc6=?Kgt49x-v)x@HC(nV{pHZ`Y0wo6e7L0TB+pLeZnM07xpFb}tFmmD^7}uHy7)JZTYF^kLv$sqnl*jmSz4FzFuN`8WP7#xt)zhz&AmkDro?|pom zN>zl`pMn7C3?R?dUlnBqBTJ`LcDo_0xV-g0aztVb>6;C}sCLxsi>nWPBW{uaQ0^fK z0eFu&=9}C^U(2&!HSpG?WrKjy#SCAI5E2JNHo#jwE#O2(m@BE-_lO5SEeZ=Kb*2>( zwh@Exz?U`mPwB+{iKXaI(3sLddvolGvLz1D(nHR!O4itK3PdW%`osb`A&4qZ$t(g6 z>f@c<$Q}kkX9)4HHQTWeibcF{g)cZS@)IOL44Ol!9nplm9f^rWZ4dt;JN^tIZvxK9 zH~~YlH1S&H5}7q1qWJR384NJ?7}#RINH8Ap>Ua;?q4~X;*nz+8r!!ZLKAn`Q_@F-U z&=m#SI1AQk;crl8==+^y)M&OlDsKB`6!n8-O`1vZ8YcRbvO0n1pCKWKeFZ3|w*icj zm2+i|iLY}*T<%TxC>R=lKXC7TKtL}_(^y|Dl*Je-0Sud|qXK4iTHVjqK2=n>?CGZ) zya=bfYOMT)?jJWCo+($jLkDhYPd;h(ItdbhMwMN!Bu;mV0u@^6%VF^?4*3NDmSdxFT@9`nlSX)tbbhp(*x0}k>NH^@zsQ>mht-x^K2e_{dH zEY3ss2ysC-BG$q5xldH|ukXW>b+oQk1FSKEWfQou?FdyTg{sacf6NSWt0>(S6lj@U zpD1tH66T^6-AhI3M1e%HuLY?#IO$J==QC(;LKRedD@HFb0gV%dljug9i@{>uEo}^q zf43V5rf^cg8^Ccs%Lu?Ah_5A;bO~@xuI=6f$@gyzI_4gH)J_(F;95nRs1|oLA#fVn zrc2~F>Ri)w7nY)$F-|D4Br8OKTFEf<1EdliJ#HiABR0zcRUj)?JR=j>hT!U zpzWgSYBNuKYlw?p1?;#n-_0YRmc#Ra=hvl#BfiGm^PY%HM~Ol`WS2>gTL#4Pht!{; zGb2SKVcs&x_tCn%hpB{gTVCoSr;i2bT@mvR_wR@iCc&GfmgRW&hHzeXm_PWg`cBpN z1><%{B~mUP^KHrsvjaDd5THw4DBE^_QbRcDg($UDY=1k1U4Jy40gI?1T(%syAQu-a z+_Kkbih|BUt~<}!ik>^&#ge`^ffw=u11CyB`cg|moD}2@9Qlfnp3a1Px~#(#B&}5G zkqRH=@NK^u!p!u(a^h|f-V&03s!j#rwz>4HoeEHNm4nxTz(z1Y-WMkQ!uwHTK&;te z5Jb5X=R_jd)dMjJ?STXFr2C+Oh6Kq3Vh*R_`EemSbIoiNMJ~r)WU9jf5Gh?jO3L*d zs|p~Nz?I}*%&Is?qW@SUtT>qW`n{~8PqxGj^lSi#+dJdn&GqLK`2YfUACI0BHnHE2 zZde4g8IJ3>fZOe8(KDCfeQ!g>3ABEbOwGl98=GXE3vm_ip&to)Bd#3=cfkf3vU2_8 z@tDB|b^PY6J(|#up{OH{bz}P(GV@tO{cC04PX(Bz3(v`$%c4l4g=pMXOHsIP@c{gg zfihKBQ@P0m<@_1K3tmlNebs3CK|y>Dr4G$!HQD^Mt{3DV{0p2QZDn89FQh+&f)#*= zpdRF_gjkuvYcJ_@<&a-CDElR9*v8(xz{fP{Bvu9!_$0Z5bonrfB!wrn&^@&|EQF9S zdbn97Qh}mCy>Id2eWzDDNwr-Wkae0R8+XG9%72%u3U$OMrX0p(B%-a&I?m|14f_OJ zB(kR%)_RAev(*w@qZ;rr8i66gA`9k*BYjitI{c@vIkK44H&L7uEuoa%k?mCT-5a8l zN1wIYn7>E$LeN2WuGDf`xf8o#i(^M6G;%ph)tpd^+9no>Kc6GRY|ipo?<~?TprTed zl3;Fa(~5G`$DmgPmLWo{2vjVh_8mtI7XU`yRJnax>)6&?`=pq%FP-Av6T6tqplXz~ zEDs&Aq(~#2R&k?J2^rWg7EQMy)(ug0=6L4ElkX$20=_KIT)0^q1aIqRATzFIK-Q~l z3yY~;GAvRjI$-7U?N}M%ad~Pg7fSv6-tUO$GGXBZEHeMA@aq$XVnOIh{25p^zNh)b zmnmP6mF>)F`V(YAaR9&Xvhk-RaWf;Jl? zSTs|63>hCPt_F~8t_r$x=^jR2=xe%F$dvvq~+}yRh02q<=k2ne^-xJ zO{tEq|VC`SH` zlUDGmhj%5hsZ_^CJRQB8;vz?|A^qXl6#V5tKFj^^x2)Bp{adBt25WPn^Bwz#3DeX~^Ww&wZyMhg&@?w)Kcu)z@@kY2vY0iHuPE2#X8k;Cz3xqPa{1Fq6Tj%BE`0Snezf8@@Lf$RlNSLvv1R#HTWE<+ z4Tb17we+`~^DF16nkAgR1p-~~=+{3Io!nt0@~C~KHZ0NMC)zVd;Ksn_Vicd1L`p!P zOM{~uCAGYA$nfmrS!~0QF1^@$4Q+}PqELz9&Cmy+S(;FKTY{6Zx{>Z#tlatTX)eb4 z;6)4iXceFf84HoYEMUe?&pP!4B}aUs_^C45gV}Oha;*H2WDnOdPNOha9)*_e$n@Nh zkHI)a2(HB%{u&iaSRqRddBBL%Y>(E+OdYY*qQx?+vXhvo&Lq2k^Vi`(!z+6Hn(U*X z`$u<^`F( zF&%%GnQ>)jVMDWG5%n4TJ0B}gT!qexK2cla(*n04wZpWt!*nGhb~+nn1T`%0ir&sP zC{JQHIa&v@!L2DF^rrl)T?vK~Uhq_H$phZaNu9^fG0lQec_YdOKmXXShT6$COYsco&Q~zr;%Mu#tdT`;ns!~GT zi+*vO-Ops9FhrC#3<^p7z{@6GQ<_0%`yCWJ^4cb_L2pp@F&SRa;e#Bo!T4>`d3tRv zb%9T}lP0VkG-?)yQ~fc!8&IC1B+_?C%3FIIZvUbZwuPf-MXpf?OxnwhhJX9E+&8*;SN$w`0xD>gZk@zqkwEzq?QL9VdX}b}|e<=^XTSo&I!V*>05XSQcE(bnZM&cH%aM$+2}^v!zrnSX_@kfdgy=#x0>zjVunFcn;&~6uPCh+P` zR`58(WUrZeLFedl>;Tq5%lT#BdbznX^+qebK5}lBz%|Gq4P=nXJ+_Cm@p+8WoRd~c zd@dlrqw-Ij!TMgs5Lk3nEK%ce1FK1^hL1}3`6E-n+-Q55K7L?Zl3Bz=O%L|!gZHv8b}HoL=j&(MUkyg^LIRYCxzKpJsh8nZGWCTn4j zi1UEo4u}>DVWMYvbz97j6}EiqIsf?;h5xz2j)UJ@ZTHlkT|;#&inrfA$OrK^em|j( zP6_Y$dxqdV^=qO{jG(nDpO;qmquFuoqq^e^P8qC8{ulL`z6g>u;M#h z`M*6FiRuVIQ8AtS7{DX2)tVr1sYPc%KnQ{ltFJsEA}vS28vZ&cyfAIBJ2;0vI=ga0 zQJ^w=(!H4le)xjamhZXvEd_$D4!7yKeU?x|y1y}QL~cw)*HGtx1Za?-F~O1w;w;Jy zt3;kwC!t6zD7w3mY?6Ho@)|f7U}~m#Rlr%@Vz6u3(woSE7|5KOTKzDdUK=&s;GbnT zv$CKI+BZAc%?c{)mG(DrGFDO;!gXMC4M}RutnEl?7){-g%?(5~K}ye%demHv+%4k& z1~|6c4$6a0I3w0b-ft?#$7+Jp{cxd)3R6$gS7yHuu>%_xif|}mp)SnAYDUd+6sg$h zRXMxD~N`6Y)v`2sC7xXznZ62u^IdhPeGgpK@w`%g&a$ zv5?Rs;}A7wGj5orJOi4NG&T0IumqNvJX8Yt=T^_)l;6G=2MWcKlq$t}fAfy}EpDnw z2t-<$D*aU}sC5TGs{#SK+y$stE0Pm&X&-ZsKesv(Ov+EqOTi?)g8-#eR**y?qA}O< zuDvX^r6YyY(FknfX=mFcEQ#6xisvrGJn0xOg4L5TbC|UNs-p4JRg6y&t?BJRO{M|Y zw}F}2@!0_oMQ$^t>Q)S_hjsCa{9x!PbZxUz<@z#fY~qiAnu`DZ2R&rG?wx1bYeZDeEZx_c=OgT(*v|mcY*8`8Qw$a3&rfufWBV5)K0SI(vMnUW z+T=Rs?I2{YX5_pJv=d&C#hugIGe=7jN5er)BoF{jB2 z5Pbx&Ga&KRqUiuNaXk7RoLlK+1Xz@w*nc2g6bfGnz|SxOgjR4imk{KJkIGp}^9v_| zg6_c}L>^2hS=SlR0;*{wN3Ucwn>N6tO1S5dT`;69RFY_H<@P+HAIWIG<0955CaFvN zK#g)*q6_#S*%R?jftn@3u<4%9dm8{PEaa)u7du$bq&Tmk%t~TCaH&ZXG+1=%B+^Q$ zlRy}i06poKs02)p?vphOaq_R{wMWe=qc7u>Lt;jXl0|*y|9H$_EXqa!UmoRn9h13m z+icc>*Z&{^orxOsYVdr*i`?fJp`K`q$F@Q;FUH7%aqFxM1$j&miFV~$r9umRtU5P)!Gbf4`m}#(m?q+eE zmAj_dUN!k#yX4sxn|Uko4ll2uK0Gd{zO*tbS*#^LJRwxkzV1o<4K%!#X4y_me8j|m z#kT$kD4>LxI%dr--9k>s{GFa~vYT&*!z>CQerY2%zdR!dyEUIe#1K&^0TRO=4)t_|xbH7?x^z91h^`O%LtL6Nt&+qg%Q ziC%Gydd~nR`B)4d*^Y8hefj+wo z=Q+^`KuJ{I_#7DwznMnqMj;tA)TS-WD%g^k*?`Oke++8Wpml3}-iZ02X!EklwC-+i zfD5Ako}<69h>V^t1Pe&+;?_r@h%pLt2$~h*n{hyp*5q$R1IQIE(JpqzN8UTDC;pb4 zD)@CRfWD$))y9iDXm?vNP0#GH{*VyOmGm8d-r>RAUm4R|&X0nVP$3Su zLIb*}bY$B+q_5IPGz0jvZOiNiSi!yyIbNl4mDTqFTVA@dGE$R-r|#@9Ho1hNqN}64 zCSA-hR2~A2J}Q{1;&fNCX$e?Keig&aI-Y%EJ=pLM4WJ=gtzxR4Hc39^IaP2Sw`Rh! zizs|}*@kG7Q&E*psEw!>UkjYVm^x^v63Bst0Kp;|*6mBQdbNmV9SN1F2O|R45i`_Vgn!#N z1ZH)W93uW|#<>#0{{@!KY?*BdVf0p|OkqWqP|t>xl~Mudja51z#HE9>QIB6v;XgIX z1x+QnFeu4Gx(97$hN6|`xblww-Vw^hAc$;%*e_XeL_Ro#k*&6LpKmTc18crCF)X(d z05T7=No*Y{@1uqhfMUpRnnGRLRS*cVkN`-_?q+fNqJyn+R2<8}1bdZHYOK|Q{(GkV zI(QvTAisW!XogcJ{z{eBE^5UfgyJY*H*ca^WpaleJT;==V6~eBN~Q#d%+i2F>?nu^ z>H{T)1-)tuQn3;Ia7RGwf+}HDSjD##+Zk!5NMQ%+UL!Ex^eIJ!j5o3iu4MvA;Q)KO zLYfbso#GOUzFBGsF+r?#3oa|@0EFO>!CF%>{ri{b5MM;|@N z5SB_%0A;bW1KGVMSa+vDkbcdA&b@_%e6!5M-Vy?l-n)-kbFRqsk8o z3EGPC!MUY!D2K__he?B(Vhm?xAPE2*=R50-nye8?_z0q7ON5xi{+R9|(&Y^(>9}&0`Sc+3``hrQUH4_O? zZOc4XwtQO!IRGg@*1sPq*?V8W{v!b(Z3%FK3*qgs#af8f+h6Gd>&2o&;Y7Y2=6=At zE#7Yx#tFTn^@|yPbX0WEO}}4o&H@75p;oShQ^tHkT%kU{O7kho!G>sxtPo=k{ZtVS zP!f~q_>~x-+))|^vuwG~q5<9(RA@?HMF=1_MF2u^IcbkzJ(r8|z%xRZSu}AfJX#_m zk>yX94N}1mg=(tc=34iz0x+|y;S;d?&$dS<*7SxF{Im`t9ePC+e2aXT^;Q(PM-?!^ zyKgKK+y`YN{AZweTm3@zF953#-kq|N!iZNiI{R8S-a~F3crwI46=Zty@Fp*)Pe!88 zAp8LHzBKnA#1S~f@lZPm5nJ zE^X2=sEk01e@DhQ<&VvxH=XZ?x&Q~W81!Ar%v3JsSl=3L0bhW92TUcbe6s@hQcWH> zf5j)C285tmrT?p69>0NN)M%DzkSOn7u#ayv5tzCdd-nO14&WU>LV?7ot-Jxe^S+0E zFW9_0VlHcPb%YPl-7hta?}#frI#BUEZPBr741+-~Rvx$bF*{)g3yy1yWQ9gu z5DU1KkBH;!+lc#4!(u7VcNWo;eT^*Yxc;o9DvVKbKRR zZsNM*ilhZZMED;or9oN8{CbeCc^El$A337Jr9k*@hCM5zJ%gMqK%bF=!kn~BBshwQ zHS9defuABT5k*d8*R)h7F-J6e1tZu;w#n^NNmmd z8C|U4gS-=XykLX}h$nIEF$-=50C_&#M+hl@5J_SQMs!Z3@f{YvcX$)=O4*Sd+Zp&> zW!b8fFTm9jkCj$U?cqOxM2@l!G11$B34LqYJ*lG$ZUZE>?lu4@5`ZfnT*UV%(Jawg zhqU6K->?9KJT#|EnoLLa1Fq%-zGEWM{=!Z5d2^o4@d;yWwBSmR4UUX(zVQ?4e;{U< zr^c|?HZ?KX7;t?u-B@u9b?Dn9a71YKy2x%+kOOcw{!ALz35Sj^$huTY!1GG*LkU}A zdfcMK5<$Ny;Nhgv+woPpMp%BuU7Azj(22-4QxPS&!VG7kP)B`g(*(?hUQb3D20=>F z^T3&GnkVgbMF^yac>^VK$#)U|fKflNTnJBQA5lb)7M@7~8r{m+Z1~SNF?m{+{-QAQVjW6o4^`k~!L85(+4f)NKobMZSa>yd0*=`SlBE#87 z48ED(ehbk6#HcW#bHIpN6L7M{(;IV)ncrDFq9bB<{aIyt;=tmBNdVlaDT)Pz#`&;+ zmYoh2RRU$HIzxHnkd+Ss_-S$wQq8 z+7WFGAv9R;Y8SYR7JomO{@CUm9HFLH0Wj#p#Y8i)zn%i0T(_k7tcDwXP(Z!>##Yy< zn=T3Ey>wei(YXsf&9qesU|dnLKo88;MT#gL4a7aVTAq=EM!bFdPSkHC1A0+b2C{yy z5{Nwi>KLz>QCM$lHG&*y3W-JVMs7<1lD(`L!CG=%z6`4N;M)m&_(?~M@)-O|3ddsPgp5+-@$Lh|55(O=(u2C!{ zS%<1AP2sPG$P#>_%yv7E?qHtO3=k{Q0S>BkQ3V<^OMi$u!msLO`^apB9{VL84tA9F z%eAF~5@lmOv+8^@>it5oCO&w+Fdv#%fk~o>pj-@QP~OK;eX@3cZB@EJ4&N_!8)W0q z%a%CWsxuuQzHS_xT74AHJt=6eV|FWYkcJ*}2MGwwish`+YmklN1`HFNcqmDI^ashq zB}8~q5H5!-a-jaoBvSnQEl}#+z9UM3c-j!7u-`pq)JHowY#^wcn7#@+BneFtD?lIw z7s#S0k{61(a+_rK7j1al6WMj5QZedU#{eu%o2WYS$`+$iUn<22;y|#mlCd6|DQ6(w zLjjc-!s$_lQ7<#{l_-81h@O?W;tYQ&7Fkahc zuPltMcP)fH#+pk@kXzI&17F8-nx)F&!ObX*&^rT`uN6}xnRA}Qw$Hctg2hp^NUTnZ zyqSvxvI+%lg10ISMAfL9_*mfxGdflrngmyrfoj4sSLw#&j?azQ7VQ2(6i^AP9#+Yq zG75$;YQ)imh&i$nn^3yHu)n;8ywaCkBzVYdG{gv$K~Fnr?8**DH8i^`ll{<`s+Hp>(!&hhqlC7Qlo)-9D}{^q9wPJR>EUPC>=CeYL7$x zGY7=SS9pY!F+QD<2GpM4-GqLd=P_P)0xHX25sEUZCLp-^FBv8v==0ropxQ6XO>Ap?Tg+vI}2uv2t z2(QFC&ihg5BWe z(}dYy!ORakjo3>Ykz3>I<*Qotn>$Ju*uw)QNkh%^ z+OpLf3HMeBBU|e0W554ZSYKt5nLmAe$kgwT#DL)t@7Rtbj2Y{-&inmJiXgiNN0G@P$VqcN2Rf}g_3~9lO2RJ|y6VqH@jL7ys zLAJ?^igt4P8^NNYT`0_nvb-f>m%Xz&n(1-1Zf5|@q?k}+z6t9y zs&VlmjsI;6Oe%X|*-R4_ti}oQ-$7BGV2OL~X?w)eyt>~}I?T0gx(8*#aU*_5WdZ3X zYD~pK1RCUM+hBWZZF)MUvdieG_Hn!z(0fGx5L%jWiy?qKx&>W`=p>tyA z2NiDO6YM|KyBZ*@&#}x?+UO zAQ;8Fn#1Fb*CSP_)r#78`O~3a%o|WG#R8`iwI%@3!V&XSRg_zND+WSohY7wHy@7ZhC_SMQ?!GCyt*Q0KTi(0&*CM`j1wLaWO!C!fT; z2hK(7RP^#pSDCo1Nv#nYqk}Z+)Nwp6QNOEsPoV+}AUFgY7M%_zG-4b&a60~BzreFR z(!ePt8j$3AJ?V8~L*V@oomSk96toGVVw+SEQwKI;Ak_Fc)KBB>7>4^Jz3_Mn34|(S ze^1$4_Kb~v!%~P5bYf0GyhIU@P>z$;m!6PNoQa?k#74g8Aj|rq4d{&Bq_w`N5_)*? zFqubGY^44bmM|~Zgp+od`}{bjG_6a7_Gz5!b9^C8ES-#oOxAQ%u2#nWfJ`m+zLV#Z z+?qV<M&idGP`v~h7^igO5JgF-i`1>6k~F0;P)5`W{xXxKlR){0 z>2SCfmKM)LB?6nxf_cKht4#0mG|OR09BNfE!brmWrzZX60HK4^PDu6XodX)J7AY;4 zF-<3`;9RGj#*0m;GVnW&bCPH{P=e=K0#p5YvCGh5#ia>?9@D@}Y#_}@ZrI&ohnJ~o} zU+8vCUqFQ+3~z>3Mw2y{Dy&pHhyJ0(*RsbCT5u(%G+H~=)zEUENZRfZ$3ak6FqZCn zc+}n<7^U$l23vO4y!e7%0r`oQRIFv6?L%2IenL*MtP-J=(>lEYiT0FVvA`z0jnXxR zdUGw#vk+XQ1POg*UjMMVL(e&q^Jk4q28svfDq08#i6>1~n$mlL?@3x&} zKGVvuI_C{Ue6ZBA=L72Hw($p_)>$Imc^=%dA;kg>3~5L$Bl--N1b72%k@gX>gqdix zA_qLdvKS{2fqwpGlh(!>-C7HNgdw zKjxYPSSQC*2+dW0tN`kNw z)E(FCql{)d5msa~L2iTG=`^>`_*m;25HwmNLEW{!gRsi*_d@1cDwbFgh6RW<`_whW zsvZ&4g<-B+JHJDm4iSsBRhv`A!z_981*<{xWc$mQSqB^LuE`pXeL8|pEk0_KjiZJG zE2rzOnC0=$g}?=@(nC1MkTEU!2wfK}X8 zlod%VpZ_06HsI+=sv>C4Bg&31EY}l}#P`vxC1f|JH25RNq99Nidk1}BB8Bl%u2n1Liq(swYhUM#ZG+hrK}!$*oX!TR;HYf$mlv@6LQ z{=9Ti3KkI-aEvC5>*$$y2_EPqE`2_Xxyy!4%f@kwZrt9D-PRUDn3DIQ1#P=wc}%4>U6ks$K0-m^xEg8*Wj zj^r@%T*6&@-#_@AC@c(N7>mtmtzxjr)f%y?Erh#91@+u}GYVzwLp$Xccs)ffx9k1| zXoc|PALzYscZ`K;kpUHr25(k`EChKB`}?qZ zWvuu^gKz~zWuyaCeA&aHEARrIrW|-TS2Ql)9ad=~NkiNT zIW0YV3yNV!A{9i%!E%2QM}sF|x@AF$phXLbFaLNz3nl+t6{~3@jiu5Cmin=QANUwPG5({4TLUO)& zmzNgxm)q!8p#={HqU~ezk!=(7guq0S4pV^Pv@D`BiCC)WAAVRsEGqSgSY?a2ji9X) zn3^k1#ehPOrKNlStmZ&*dwP!}ivddDiT;?ox^SG89(r?-+uYPq_)2s?x zeps_>0-)E6V)XUcP;Ls}_1P6*I`L5PFH8)u-cs;WD8h6b=8GJCXf$Z8(4d#E8v67c zMA6xuPuruUS1Z@5jscN01UGr$*ydHV?xB&(3o-ny^1cGQ@3i1#cj3*S1(V8n9tvSx z4Bf5|Q^&hO+yD<)BqgTxh5ND$%2Fdo;9E2Ku2J9J5O6{x5=Y` zyxARWq>%L&s!`G!r$+rJoZM(YNly5(Wd&BN6h71P6VJKA%MRZqd5ypu zK@;||CR_w{`|j%Iq!9RVdUem-U5f#0&0=h+@xk0A@87&EcqME@y1%nd{V1W%DKPuM zG?|alPr1Tm$r>f~$Bu+PRr}q)b6rr=0*nz!PtK*|9vifMcdJja?5~M!%-@bRii1x+-P2K#-GAQ-Kob0muVi>Olgy zcV{w)AM5FS4+;N+M@Pc^w~@U3TovVK(mDE;v+~8Z6#d_N zNyJyyt>iJP)*=^~1F^ODmeMri)UI8=zX#P~PuKr>(vPdgOwM3ZPw_bM>D5mqk` zA{Ll>A7hwASiu8gaKX^}tn^}Lo)OgO*jeFh%b|8#(KzOh-t|PwR@i@b!{`Q$sXrq6 zL%i)U86#UG4X%-_&({1~bXh>47A7(5xXY#|9*I9 zDC@<|)rd7Rm4&m^GocxS#ZpREyVzSU6>OyAZT#^Prz}DVp=(0DZ7kLYK6ZnbC*yZY zCcz8gw7gz8eJq15nTeHMS|D#3rT8O3^JVSv8%nteDZw3?7?&lOE>VHHru3fApx$se z4Go@_B$w0uX0*9Vv0`Rw7-FeomLkN&Lf}DZb)|bO2CC==~^*FN+t%M1adr@`dsiQB|hLr7o>v?S(yno<~jevPc!b4qRPVUrgTulNbS$@0X4 zPO4r?p~HlRMx=tz)kV4h=utSq3zjT`>-R{u0t%dOSVW_hlXLvOoFF8Ty|E`^F$%c` zb-S%JrO+S?aH{mXD~Bt?Vkb3AMC27(F^p`6fBzmsMYdM+prY3hh)i9u)J00UrIE5b zRF-QK^sYE*VUz6H@0n+@l2n5pC_`z!F&3qk!?HIWw%3vzU&p#CV4+Smj|;-$!z7`8 zF+y|#PdJYzLg%g+Tg_d9n_Ft z8;A#W{X(+?*z$@|%tPs%Z1mq^E>eV$d#enGOqMX*HZkD_P%d;oZ+ClA ztXk>4p3|kG(9CZC1vXBM3l23PK9Zt);d(}yn8cJ^OtG9*fY+#+iu6FfbdzItsBh1e z?oahao?rca4!fG?;Nsf3LPXzlspp^vU|yYbW2$RJ$}k*`u!r@K?jt>qFgbjrz2SHh@EU@v-gw!>mQlc-i`LYQy^6D zU)h2lHIWom1+B7AR`mxCn&(SzB<_7^9;|D5`ZSc%DsBRm(>a4>HDC@`yIs}`N=66X z_z#uvItq9@7`fmUiY~*oVy9FHU3`cac$*~Rv#5wMe8Fj~`7SCQd-J{g1K{Bt--5?|m?5O%^!7O+(QNSW|T{%Dlvm$F2rWEklxg66E$7+ZZ zqcny*)l;S>WAp`F-`s&5%`{3vt9oo_Ao!`^CCgqz znNyn7M;am$b{c{&>GV)ZLuK~petu^$eOUHJ-gB$eW)&{p!Tjta=u;7>N#-rYAV@vO z4P$fKL!XNlwcwA6r~>-4vM(+&0XH8Dc|PnPBGl<<`P*s4#JXIKI!1iTrepXV80RVf zcCd?UNNNCCb&EzRu^uph%|AH0Tw7>V@@nn(g+Nb$X>K^m({^ZT;=UDEz*o_2fK0(g z8evymTB7*D+o%1w#yUb_3W^biRo^XA#1t0a{KsmXVT2|S;1AV6it$0bwR*3)4gBr_ z(NFjuOAK#SOWVKLR`b5U$^RB))F^Gq{LgleP+1E6u7PH>v!i|^Y#6NahN zCrr9^kNH|NlmB)X-^~`ujW%M^oJcZHf)J-8S%k(XY0UD|2<7`)JP)EqBa@&a^9FDw zmonXYzCl)93p4#C4dHJ72}73ftXD5wqeV;B^UZN#{>S3ujbBUSj~v`D@`>4}a-c?x zCxD@a1K7a6B$^=EAp|`VNC=9KxW-z|2c9SRJsoyw!0b83Cl9A#uD^JoW|i~sJfKF1 zwP_Ljasja?Se~6JGL61$KxMrF!`Hkgoy$Vr){g=rP+*{xHUa)_tH6aw$4?3@P*w=ypN>2Grc}j~k zh9C{n&0^(a)~%UobBPjm%t{XJSS`G}W3i`0xisM&O3Y|J?um-6tz<`yqE5Ly7~S!zV1S_M6K_G#q6m?k#bhW(K*LA3mwhgzba5Wx4+&EKWP0O# zWJ?0Cr}?7cmM53PEE;x`FII2|Nz0h8I_==)LcMA8N1d)@JKH5=mCT|!>4_c!(p2j!8!K7SBaH+x~!0D}5wNN0E5Dv4$(E3So&}b{I z{t#r4yKaA`08?*yxP*hun~uORtNSs6(9=dSh(=2nKME$YGIbeXI~C(vHDNeB)=A98 z=%61kFxP0L1lzuvaQYX?89gR2%&*?`>Vp_gszEPH$=dKHyvFFjHkRD2|C{$(y>5bc z3}rU|l_Ahu->_=#;eD z9OU2M;LWxi80p^oEF`qzEgPNuPtX})Xc5VAEwqllaXl(gdp`>xDPEMe;&N@Ob2_xJ z`ik;|e6tlz0*=!9-7}4^UJ}p~<*nomiPBhBF080kXn~2Rlx-=}pTtL#P<{M0I>|M0 zt{YV|pQgwTvk@47Q*kKey6lP=0DfzuY#~&FWUt`lNlxUMb(AN&4Ho_6d@h5|izu%`+4Hy%<#qB!>e!+l52e~i zDPNZq|(Un8rINHP*ghFym~0`U>#mofCEL^|EEm9-x3f>hqI z-Jd4=c7*Sp=2>psWLQPz@W`?RkN`!B4uosrtwo5a`1_l2JnqDrdz1dW8bfQsI7bcY?vv33F>9=3fTG{-BQ-#qS4_ohwCtt>(OP57o@18!=)$SomU&BrR`EvS`~-BYX7jZS_w=hNc;MBr1qveBTEi2$6U=G zI<^@ksCBv#Em3vvmUBGfglTw%;$FyRp(^Y#nrtg5z@;}2{f{O0_!)p~leB8S*tEG5n3mMS`hGY#O_m!-* zOYoDW_(BvN5d#k!d?yGea-n&Wp0v!O(mWWwowyr7+ka6yG>{1dqKFfaExlJ#I)c>> zIh3YEhnh+=qD! zP52MS(rKl>vsz|pO6LpA9CvMM)6&493(5iy#=}W~7^JAlcH!0Jv0-|kUgG9cgwiu? zPKvisO72RKFX2eO$@M%G^e@tNN*bMI0Y@?mF*)&=TWpdvGaJc=p|1T{*{gyUPo@JP z>OD9|AtL^X2mK}p!D%?H*fHT=(acPC!2Vq#7b^gD;+jy2)kq&qzR3}b!UWT`3WK|i z7aFqzyD^cS{Zf{S-&6lj!AsrF&CtZ(w&S{G62cYK(Kn|MZ%EZRX9;+eHr1YPnyh?q zJlmLs=s8PF$c3lG36y=ccSte3$VM-BtreAsCi@>*S7q43-!8qt7!!^4YW}XfW@>=u zl=b9Z%4c&68$J49s<+2EtPykgtAQe9`BuYcc_{LR-YI*&m<`rV#mt)Mh}?Aw;^aZUrz9 znG8lby);AgkIM|nOD`kf#WrJfs@#>ELheoMbT`|6V1od zrZF*)D(ZQK5VzDcIcB=0==wm!f-+c>i{KjU=J_etVLdY??$!_a?!knNc@9jV6!+2< zqEO1!I@sO%4XHVuDuk;s7QTEMsQE74a7pQi7}+E?QnIM(nDouFef^_@R%VF&hH(J0 zSxuzlA@8()FqNG6FToD3sKcnik1fv^sewzS<+$!gK@0sex*zBRqrEY7P zASc4YBsu{KDYx>v1u~u}3fCaocRM79G@;2g$2{LnoOx3M;OJA)vkM(unU|sTw%LXU zt0yW;1xd5^`GyxpJ37xf1#B=p&8pHK%GFq5bmR+5Zw!wKx>84y))kHqJ|O6S%pV!y zz0fj~s99X_Wdjfvmj*udoRx)X@c|uSq38&P5sDIGLuw*NRb6t8PAi&RP98Yu))l0* zzIe3DomtcumpW4?Em}55dvzh{Dz2A@`T_Nr85pSw7Jq6=iXLeNhlW+162kp>Q{48$ z*o6H>|1bOH(1+nSU*Xo&mE+A!8l2^Vjm(b?+*NNEy2vW-Ap@gMBt!=;m46yw7#BS< zS|+9PMI$^Bw3&-rO@{9C>T_d|LXa`ik<*Gq_mR6eoCkM9cn>bnQu^&iCtx~TEE)~x zP$XCAdZToaRUm29XXHGkc-+^Eft^^;bU|x9j&hML_hJj#uur3l0i@pF4hGzt1nUQo zIuPAkB=L^o`b7bCn0JYLO@F=*!MkygQK04_nE(L1fs9{5n*&GCXZv_0&9iL~5g1=? zZkzds+??KDNd?!`{w`Q(f2cb8-Gx$P2;(dIXdyMf_A+>LmHjD;cTS^c9uy4JT)Zgt0v?whN*A7QN(_+>ni zh6|@v5{NW12km06Ra!xt!h!1R0cSPR_$D$f6TU)q9T#uVT1t9 zUzr{b^dw)=No)JW@*~?d}t3n*b=fzCD70 z-r@G>ccS0?NZMs^?mm4V>{&=NSo&Rqc^9&i{U7^#auzdo^%C%os$p_r+ss{>#symz z37X%{g`&1WU~z%)_4K-D9jUoh#ON_57cS5q^`J15Rv_B}&EGzJmTA0H-`(-v){Zco zri85?rA9~5)T{E&w3D16{K#Q4k%89ekgc99c!8Kb8xagF=pR`PM0?~bHjdbyVFdH) zz{>`axpzDl1cx~+w}=AUE)Fcm^UL#hS}H#}w!7uE$?`MO8PafK<+BF%zHg{^?x5T= zQV%*2Ig}B~$$KMM-_526JSxhQi1(#ry?7>iC2?e|mmjmnk~IjB8miFBfAl-TLCX;t zVG+Yg5_tiWOIC@vjmFssbqQFzWw|5v1QdjbH;dHY#7-b6Hag3AUQY|3BajG|QV$nA z^>MdRQT@3r#Fr+lOY#R?@cgB!++j%F>*2=j_9(e~ec{sgIPiC{Lu2wMXADMe*UvMG zYb$)PwfCTrn1 zLVn>&Zu@_gQ8G^^M2}4TC%v0-{Fyt#0tVZ$LQ*}Mu53{v6*`I#f<$qp?4hB}f>B(w zAC}tI6oIwD8KjuDS_&2+tOSlRqsAplGGb6E6w$t!#y}Ky(^2iQ@IgI3W=cSJr?IZY zT(sAM^gn>>ugjnnrb)G#VN6R8P}%Aze4Og9Rd~4pZG8wBH1COJ8QXYF4i%mk$W59d zk!Iaz8NG$~B^OfVn0pSyVs+d!wQaJNS#2P-k`mz&K>G$Z3jObPq-+}OF4b};E)F~s z>Ey8?vdmFcEU6gK%q#@9j1+uL64Ujw`IU98WO@6M0DER(ScJd8amsQ*ndgYi)E_mT zS8N;HMiFCKUA#a*5``8T!avNiNoX!D&PwCXdM4rENUh%7#k!zScUSNsARqD#k^68t z*o1`9@2@8W<%pkyJa`}5XEn|_Kzaj)6|vXa>91PjXM$=|I5E1Ap!&h!%s4Cb(nm0l z0u&iOsywj?LsM{hRCmL_vjgoAe_-j~^oo1d{2w%Dr`2Jb(}d0xFx5nPARJ4L!8ZCNmuE&~SQ0u!Vd4On zBh!D7`OJH2YSVusDlR{@g>p%(eBDeRs8td&mr)d)U>+-L-f0ZNLU9{Z9|YrHE1Efz zn~rP|_`ycpoBt~~Iut8>sEZv3qHv63|Mb$;E5Rd?h*=CjH`!woIm=$?FD5H!aVULw;YWq6$VEj`WypDW~{g>Ot!GN_v ztp|q|w_T{??Bo*=)0}-KZrAwhM`PQMvF4ze0#A_xn$kD#bdNBzOcSk?un-7<gd zb^Wdi7lHP3#c)QXC5l~qdM;+t2|@sd=$T<^@gF4zzZy1xJt!lSh1#si^^NeX z>hvnuV}LUnRzA6 zvZ0LM`*m7Zj5ryMr2vs@S1Db4`<2)pvfymN>g=yhUWa^uWco^w2J29AStu2>z4p^I z%4Vc2t{2EVq!P)UBON2q->6#kO>3++Swz+m`>V^jGAY#oM940CCzio{yG(1G!!>}j zTdWOj61~)6@CKTl<+ForA06-mWBN~;g34aHyD|naJWEQImdyyFmVw|#`?J8Bh`;uv ziM}f6Q0xH`p$?p&Og4+s?C$-Mdl>%=de^A83jtt-s|7`7o`=R<$pq``w31b??+VWX z%$VTmrbIv@aw^6d)~U~f*Pl2!Mx{e>#hT=6_*2(J!j@o|UxY|zY@o_Qj9Xg>gIVtN z%{URvvxy1`8@kjGa+|DUscyBAp=2owwwOOF|2YPRIR~%#S~muhnK_FBJ7)dzuNzYh zeb}5akf2~vJ5VI}VX%?1$#~`N zDPf30RmU|G@(MCyLJirP^#{5kjA7p8gZU?@L5V+1PG;}{5h|P=?UJwc6h+G5s6}kK zo!0xgV+I0XWtX?-KMo>p0I9So*oNG^UN^gj5xrE`uuD8;@65_2Fr=hNsMoD!LAD3* zr~>B3RuY(XESj0s1yb0eNT@sHP)c&(lWwuFcGw6iRTq9xrp8q$=jssFaF)qgjUzq& zsRW>Z6yqt<^&Yl3(O@`>E?_X0YcFKI9xtmt7QK)*;27w|yktp^^r2(m5R1POUXpYG zuxhz)cwEj)^65l_pE(PE8HDRtUH@o;Y&Xw*>r-oM)CTKaYn8^91Bc3V5>;i$IE7qc zq}@G@a&yh{Tq=~d2B+Y^5JezlD}s6gVhxBs3J)utzN+;!2!`LAW^qIq#F=p7&XE^u zufS42u0$=Y^1>;zO*_v=`LxfV8j}N(D+$l&bFIG7U8hhw5dJuq`z6Cn;KsKfG>zcf^iN8V%A@Z91RqhPjT;cu}3UzXUf%te* zkeRa-9Zn?u`z*zJdyQ;yPjgMOc|9@!FH(4iBe3+c^PipciPX0afcHgjrJpg6~LT)Z&%Vfwx%r~Nf?(THAdiOl%duQH8s|q zG#&-YcR{~{N1Q7jG}-t4N3uU4L--;XlKrkQ5dh{MSIM?{8pTAC9|6eMeelP)J@uj9A!7PEb?h|QHB4s|0kpPqcrTNP(Wsyt)L#!BA3ezI z7Wz49QV8@E0wN>tMYH(;vG;!KQlksZN{0$laY`=!PIQthOuvhS#Q^;?u@u6T21BFK zY}b$siyqsOd@>HyL%{0rS*%diw#*X9Fs|!_63GRR)eW9%)@b^Fx+ZjCLk4TC%A=zr zZggmsvey4gtXC7iUxZx%Wfv|w{TVz{=w@pe{#Itga##KNpVjF4=HD~a_+5L$R~Svt z*+C}Zy(c)%rqqKveCnDAnKI7Wj70Ib1D4VDi^ybad5!Aq=u0X;a=So3^bIW;If zhlHMizp4ilsS{3jCWbO8p$G;-jv{fQ?S=y#ScvRN+?-BK_;X ztp$_npUQb~S8Bb^$H>+Q-vbX~b{9&Zriqi3e+!XaF>6z63K!alS{&>|&m1=%cb^y_ zMg@+Un}{%%S}zeJG+*vx?f}IX1L-Fze&h=&&69=&$0V{U-ZKO3UYH;S}Fce9v8H+FqnO4oCLPD$u@g z)E5bZT6rslfYygp@lbrme7iokCJH)ncxY3-Cg?G5^%}>kyZmRV-NAw!uxXV||lp36j5zfJ2Sujw7no6fJn>f4T z9Z;(Tq;CY`=ms1qlc#D6b4FUDQD%NY>W7W*AQ+Tx(c6p-U50IftUv%XVP@lv36_y1 zG*UYZZ!#5Xc(NsffJ@d^J2Wi_XEB%i7fs}`kbFP_j_=)@+2r-9{RGfHCa=C_4#H=w zrs4JjVzoH}T9kRm34?q^9CI76AyT}V;N;#BjdH);6|0nC zQA;+kL?N0~29&0;06p&j_Q}$>q79d9>*_i{ik*@`w@Yfon^D5FfKjH9Yzs3yCZW|; zToOxe>eim?)TTy7_N9%GC_{J%{k;zVV3z}|wt#X_L--8D?5I9s&Y>wqC=xcLAMi7> zw0`|;zkaRtV|W^vM!2)eZLxOsG#zB~Ci3tEJSwW6jNiuXB4gi5Ne$T2gUM5Afiw>d z6Z&#w6%}cWg%A%Uvu8g<-;SoBbvm>+J5S)fk5rYUrq{L?5gW)=OK9@h0d$?Jt#Xd2 zlGou7qjKi^o;ELg-e-AcQm8qBqfUZcT2;k!BP{erVAdkD(LjTMQRHTB6V!rrJ(liP zYNr%sU^4LAlZ1f=`W{aH!YK>8QGxXv!{tmN75_+LI#Qk^&V|pP1wf&L&gZl z^=EDEak~Qmka}un@h`dzYwGKd%V>zMvAbs!g@~#T*b*e6=+;WCmoBHqabIwOVEwDs z7J$uPWx-N-i&NS`MJ=r7&SV*!=O5$0j*|1H8xowLo@^i_2;e6N;Le?W;6y(C`c^2= zSAN_iWpC!sQXQKRM8TM7D6)i+$`%;K$VRrbrt$=N;26T@Y2>5ECuW$8LW_vQ++evG zhRYo%aX8>0)`GkKVFVf72-0O+NTAq`!$`A%Au){{2=Lp4hTGlHO@|$4J$*kp``)ld zJ2U&8cpU_gT1GUnoL-k#xwv@?dABimg^?r~gty?8Do=z+;^_mVCy|?LY}Kx_Ph$y< z$ToD@w=n2V8HSzc>?PKqnl%khcK<2EKpi&r*E-E5A=Dq*Y|-v|p?f{og|t01x=jTm zuRe|tUw?1L3zRW(CuUVB3Oa^yr}pBQZ`@Ot!9?;t<661=ON)2@58$#023+A8tQ;zB z2HDTvFDJEy3KYJzf5U4$mT{p`L;m2DD`1~T;_{4-BkDzH%y zA^r1CyoG5C0$7g$&{eo4U={=OYLC|_BoLG-5+C)WAk-Kiv4!Co<%fS@Q;jqm=Qa-7`vaB|9{yh#xDwPLOP}ttHjD~hV zidRV$1-}xoM|fF}8ckxdlv~!mU&E;tSEv-0X8?C78$5b`MHP8Vs1V5P+I0alpfEbn zJ7lfQPI%sj{A%s2v8{|E@%8g0b+VLmLkZiMMV!z&)`!c@U-5+iQybvA*Ri%a!xWvP z3`(O{e)9WEIIu!t&ZyIh^BjaKWa!EzDGxWBl1CRNp}?&pitu@ZURCN5XfImFq<|}t zb#BgYirpH#8g98>M29vvyJ2J$=L z1#lA!L?lK~PU?;wM|*wak#s9M#f|p8N2nrNJa9cW*@W&kx)@UIN-Wtd+*CLmTO=6& zzpK|KuT`gBR(J{$=BiF;f1YiR*v+y~pw1N%Kz>(-(PP&jnsPsKjlWzS27@8#Twp`~ zF|bL7Ggr|ntB29fAg!5#3+(rhfm)m)<3IyTN}|pZsQhy7Onz2AVOPdo_6@S+s_91g z3>&Xn^JafHMJHf~7?a;9jd(3yMOp<88tV=T#HS|)8e75IrZ^o$I{f_JNC&l7d`m>e1J!N+$h2YW)F6 zBGti_E-&5LW58hi^`5?> zULUPKs8klYODn_H-A}E`nA@*%eMz_@sjwVau{+5-W92U|AdNFAMl9-v=e)RFY2}<+ z90$X{^pRMCY_akt;wzBNSNX`okb#EREENhOIVyF?!4)v7d#fv1-bEe6w}t+`5?s$b zAZFH%uP%}@zcat|Ln8~g3nLt>x~5IOAkft16Q2`acNum>-fH6-T zLUS0!1k7rBvVQUhGN3Q&@8WzQ3RmVg`K8{4+9$)Y+xR=(L(Tnuah)P z@t#3pL@0IK7EB(*nJtqI%ohmi5|in?IFSc7EvB?%14Dv0TTty9(j$^VGYIl(;m?sb ziLE+$Vd<>1H!=2#UfGJN+;<1Jsg~@YLtZd@Vm~72z58f4(!}%Aoz9Flro~yVcToO3 zw%8gT=e&lq6|s<$hT%J3^Omk0G$H~Hzy_x%E~~m9 zcCYF3htcF_`NHS0(z#9-s0#VT*#-tk}8VqWy*h4Fx+k-~90VR);#8o%SM6N<> z1^%plZf4`j|1VN^R8t{W+mi8OFPdNlgP?nmmb=`oB4$R1Pry5Lw09d8-?SQ^v~-4s zw3MJ{y{@JA&9T;_wv%i~2d%22&aI81(Ka-hZ7 zH?vqBd~0aK0#!dAC~0gIQgOw3wO2Xj7dJi>p`En&GaV%_Q;u zT<-Lc#g~Ar4eQLVlV}W=YgT}BzMf$5-$!*VO^w7F5`#I*L>ZKO&~9`;Oy;M~Psvd3l2|4-P_)ACQYkfU@I&Xihj?0^j-YKsrZl7DZ|-kmqLU zYp+?98vg)PiR{9aV)=}mH8Gn*o0wv+Q>&P2=#&CuC~^8P2RL^pt6nbM4GzRM>m_oh zWI@l2B$dOO=^L$o=DO{CW8BnnJ)A7f^<-V-gzr z0QYjGShVgzrZ9Q`=#fXB^bcsZ7pcq_)#Y)<|3V8Bp0NP9bHK?fPjE;#f}=-E3G;MI zTz?~-N|Ipf;lFq@S5j{p<7Uw^PX9<`HEj>XEqK;u-5iPMV?U>(KC>lV*^b>%eZ;+L zNHiOz1JZutraR^tgPQNAyUdgZy;dt5V^*pV?=)tRTgVaOgb z^&Xzmni(dQJ}H?EsXMUA>yT@+Sx#0dZ)}CJ*3)9@Q)RJ#tQ-*Om{#2_Zg4tTWa+!o zShpk{$#@;~#LGsO8f|_>h*70+ioyx*#W_tX%NVN-I|kbF+m6|jT%`%kfhMHtfG36*kNs9MUiwR>TwCt4g5m|;Cx5$O3ks(OenzXVjOZnL70=fF`yd0E3 ze+yrLM;o0@c|72ZcnLAkK%?(PMEQUD0zq#I)ozL3B75{6dk#Gac3{M_V*Nqx0Z8XV zar700h2RMkjIqO#Z>(proepCD_Kt&uWtC~h0|%-bYphG()L*_Q>%$nz?kok0iZX>X zfAGLT5Lp}G%BMPxE>s*TBZPOG$w{9{wj_$wudV?)?T5MMMQYXB_3~)8>UI63VIA69 zf-tv-^$W}2k*80}l!j$uc0w&^j?;55ClP3Jj*<@(G4n6=RSYT!c+<7VGq`=uC7ay0 zvJgl%X%4p=ZhfT}z78~WyIE&H7Ep>N(h92*oUgnr(jM{b6OfHeQi@xuA}EOJ5{y#! zmOR)|eMk(0jqYpBKgH+~cL1bl@^BUk0I#VKngo5#^RIyUj58xV5;haI8fYMGc0?4(?xwVV0f)!*?tYJiSz=6ea-zSMB5h4I(K6+ug4saVS`khldcqtIxXRzA}s96d;%g#=7p zuaaUI6#OPQmi#9eY(eDpVzC}3IxeQ2P>mxWa*7B{uy=%Ia`%9X4j96?{NEa=w-G(F zLCKUDA!LJ-G}3_JDI&E74S3MpfXhY>a@Dvm{FH|2-IHyAk;4RD1N0d{?~n=)FW9c* z4~yAA@t%Yv{SVwVEoE-tpnN^u8~BJhQUNcBG64i?P_5|q)BVHArhgnc<-rS71+?>N zB~u=U+!fLBy=r>~y|V8~TlW=m76O#eQ1mO&#AMrXLI2Bm8?+;#?wTxRZz{BCbQgDg zM@M)ohr>$Tr{z94@kWVyF>M;yEC;kz5sPrH2FHXz4E_34m?k0U;YY^W{UFp01#ZQn zK*L8s?->dzBnEe9Ir3ZTft zV4;01T6%D4pcWU80q|1}=#|<-lMYkfSI#hE>-}yt0_FwB9!3-~;NKhOrd!5AQygxd8RnTs}!o+qCekDIrh)KxX{|f-P7& zScjXGnlpD-fNezhncLX<$lN#7DXP~!0RrrF-UG{Q@ezAwf#6CPRWl_U!PD%ad3`W> zS#b)+SUym8nl?}aVbB!j48i;*wTVp@mNNPG-OWM4l(A(C`tN}kwf?-$%`go6jPc#O zf@uB@Iy-7FMR41C_(H^sHh%IIBG)IW^&mi1Vk4@D2aHlY)*yFMv*l{b_tW$S79lUY z6F3DIB>S5fIN0^lD+(4fT##k}sqP!M!ndTR-}Q2kKL%s*V^>` zK-AKavZP+gP&R$Tv6asRp+6@#ZpmN>C_kPDZ(8;K7mtpIMjre!~ zWp{Q=c}f5xvEph}o^(ACXUk-5;=dewYd3h4YHn2>3t)`JjIvOB8v~^}pc8RA__)h}u%0I>2#x|z)Rq?p#A!eott^lYHz zT`zS*hqw`lDbWZzxU;tI|KOd)8N3j>x{k#RbH{Kju@CjD=yFpnCtrhHxV(heSxV-0 zfQ)48izRX3i)!~e3GK;RCW?jJ31+h)Jnm z36!Ir1aN35-o*M1J>@HL!$Fsoa;c=LVIcbw^c8q)10D=yEt?!knr;x=f9|#R6*X#> zst~A6;9w`hnrcqY6;FH+9$_01VndzwQOX?|8(GNvj1}_$_Oa8KO{O7CKWB`Vte}zb zbKwQ20&EEL?SKqd$hF_63T)fQT_%fLne9?kY!(`@GiNB}RVl$cO8lyoXGhD#70*9w zf2P&Op_al@@PO2BAx!)0+khzTdw-4lz4cUHu9(87kkz%|2;Nc?)5X`165i!sOr#0^ z%OL-_7^cu5bPy&(41sfVhTdH~e!H{xVTX4hZQ}qF zB&1rIcd`+BeX^0xM2E1HIMoZqD6swP=xv+GjjdwhR8D7xOy9WDq9C3oAmqlX&Z|Zd zHf+e21KG}~y*FIUovx=^HG>~*%d$}hvf9A(&~}}TBzCnCPA6I$eF$l>3>?*)MsXGZ zODLxi6}MdV(I`3sW5?uFgR#6@yQ?TE8jzO1E=*d@3Kao3a?($uNp1md=<-%5A1zC? z`M$kg>`BeICB@9Z)m6;$Od&!VtijS|Y1OgRh$n+=i!JRn;K2X7+zoC$K7nJ&(O0ZM=JeCUg;Q+(YjFr%wUNmK@!1=%d_e+!OlZMTpU zA+2A!8&*-0wc%#qD&=6COoKj;>zAhVOBgyd<%l+F4r?S8iKR@jRvV~SXYt{@V$q^> zUxx4&B*X(NF*!~NCQj(oSGrwBvS8I|UVv05Xb7>OS~aNLvP}H(no+LHFIDYmEZ#^P zHByMjaX#W1BP(vd|PO+f2au)3r(f1uu;{|xQMSTlq@WrT6r zidTWD>eIgjokmXAS*PbOQ}s)1d)0^UYv*Mp-4gMgDH3}k$~c-rhV2wDNbaUawmyW;?bJK;C%_EQ27n=3H+G;()HNI0Y3hiP;cS+eQZqymC*9i4*4POmO?8wE4aNVAccD25>X}eIKwn~+F#kFHPo>cPd_bqb;jFe zb%X?I)2n5Vjo=Z&MF$%Yz1g~6f9B}tR1|eDJGkSp9Qe;y8sG-zSRcQ9Uy`&{=IRe` z`;VYLAoxA#G$HdEJ1hB$pWw3O(aNu7E2qXv`28+TM{XeTW;XQ}tSysTOuKgbX3GX@ zh>g^w*#EB{LcMeQGTRe@VnF}FS$xZH7SiJ{DHV5isxFTs{S%j^(z0hl z9f?&p)m589&YP)x^3CM%6tVFto;iF=ECUC8QQ}7qw>RXv1S@vNQXFc7 zvngLKkCN-1xY+}n(jY#6tJviTSkfa=WvERzyBaz{9i z4UP0jz_NHdI{6R!SHl>NqvSJpyodOI`1NZZVBHqNs0Ds(xWFtl7tN+uO9QZnIeE*I zr%}+Pb+ns@wZ<%jmCnfC+)y9J0;v<-!734nWCDb|OcMgug(qIUKZE*B%T08qfaL{v zx~nh!CuC9Sz&r1z{N&UK!Epvh1+yg37taQ)Vj5JI)zi=m3pQVr3m7qgK2TuPgle~| z5ELZuSi!SvE5NZOoGH#61#8SUV`Ou^?wlnlhTNedj95G>u07hX(N0YONp&k0yJO;essxC z8Z6L80=sRSL~HNVZ>2etxTx|C@XYn#JNW2%fXvKHB0M%u%P6L1kfkOM69=#wJ{gi2 z?wSkK(L|?c&@0+=ckbS|LvK!HQH<8@jXML{wl7@>REFo@JNdBX{`h_Rv*w-eKY6Pl+kZSbq`d>X60d2Q+6=p22cMt?D5X)xJfj9y(aVpsyVgz$SUK>UMtwYc` zZQ-+8gbhMZd(bmEwo6n&mnA|zR#$vor6eSxw zU+2?=W}?_wR;Q_l(V}zc9AkvqbKabDtNnM?{F~t#^2pZTAPr+Seo$L|_&q-z4(QX~ zx2C!8*wWMM-kQBZGQ(JJ>K!v4NfCr5wE=AyosUL-8uST`q%w^5oS>JQlM~Rsn|HVF zrltz+=Ex6Yw(ZZXs!Dz4h(_Cw?_T14hnsc}2#@tf2 zWsKmJmD?c!?Qy3$o@l@Gx_p}}scF59%?_uM6^*wGXv?4`sFEJt_D$TbWQQ|74Q>JL zZC|p3$&RMEZHz%7(O!FE76FqAS()a_aHuUS+0lS9i~)_Q3Gzs+t!_fH!;qKdshVnU z=(;4;qIL{zs#(#Xw!~fq!>NY=n^f!&aGJ-?Komg%mWI)3y{W8Bv8CYxBb6$*n1PlVEw$MRY(ITX%Xl#eW?h>0um811wtjs{4?(8@ZC^iqUCGWwiyO=8FdD+w1QYGOcCQ)Fxt8u) zwrc`kcFmY=L(-#Z>Zw|EEgQT*6YaUansd!)|9oZst)KZr9@+MJ@(@%V*`*hvakt;r!~frHL{#k zXk%%bM%E}pm6f#^K@~5CGS@WGTx83`h_53jW@cXPA%nLuhP)gC52LcMYId4JcGM&} zEkbSxH4kcjVoS}=2l}>m2>Q+Dd@svGO`OnmO#<5YdT*@e)woyoLhYRt zK_`})tXf|h00j;-rW3}Z4bqE+&Q(jC)CZIuPcw~DV z%4DoebG))5(|)&-6V`k^t(P~$mcuBZDaE)#n-+KBPPDf>n_I@LB=x${D>kilr%O(R z*u#*zs;Wy_)@N!e>m#v=YEhRWkZ4fbQ;p5(=CULhaXroBb|zZK4g)$FvNB0Emhemy zZR1i>;h>fkIWjwtu9%?=(L4PQL=)u zX$se2Ey88EBcMUJ@4>yAgR7~g>$91cB%uA<=Aro(<5xi2w;qOe)3-8ISy+p2AHuhc z_dc8E-hlRQCpJ|yXK*Y)W49{N6pL=FN@_G70!@Q%v-aFNx0J**?FsyKBWIGAru8#M zwSC>lc74@kREzHrlMTMM_ey|uF{X`0KxkI(6~1doR=5vn&8`$BE3CGjnP}=VdV}uq zxyI)FGeKB;4ujo2(?shzQGmv+97d;YV@>;QQ?%C#>bPde>p3JpjF5GGnzJ@bFtAx& zGE>P7Pi)8YWorIFO!5HRM%+?R7*$7hnp=IZ>R7*OqL`c3I?-5FF(qJD)lAnM0J=0SsuEyw(nwF1%`4NO zYOBWfH0SAPqGA(P)tV8Sm`7fXH|lCsjZv?eH5j#m5#fu?MF=UVo6SVY<``pAa&yhb z8bf`dOM2Q?O(8W~gi1lE(qpv;RaSVKgAA(*xoJPEDXVDi>8JdZ@w`tZnm&~TVDTw- zS({m5T@60<6wuz&3HQ|bduxJ>rm@qH^;xGIY@(={X&Rkc8AdfTpvx%%?Kx$9nsfRD zvzjQ4e{2tJs!7vYHvZW8OiWG$Ob-Lp^gXikY4Kx*fT0|k@OR%=H1|(H^Gw6=$cs}= zRfX(et-Y~F=Joj0<0Uf`ErJ?!izBC}Sv9Xrdmd*dKLpOdqXdJaH#RG}W2ma+rt!6D z+nuHzr-^HRWQ#39K!cif?RAHMFC*7 zo|;Nh?eWwcM*?nNY+@O6PG~$-V_r?$jYLzc#jR*?J?&u=g;kPbHn+2~GK}DDuWM|r zj7}Dsld(xuIwe#ttJiF9`yggHu*=+O%)J=@%zI2KFL>25b5v1w%c)SU`jCz37>5Kzm2Q z=(vkn6LcC^nzk+yz^5*{z2-zA324#935|7SDjIY#)1Hg-k|>}#7bivVRr7}wt)KZB zp-Ej+2W$Hi9fo?-hm01Vf}+8vCaeJMeF2sRYwol5(`K3-ciGxI76FYn432E8sU~%3 zO~{R>MTc27eF(6PWmPcZtL7yq8gv?R4yZYooat%b9At&MA>%1`$Rpdf9MH6z!@wHs z4yT$h21Ta5Zmyp-)%;)#%p*6=31E1#Ool32Gh{2Isbtmmv3^=)$(a-k{VGIFjYU9% zj7_x1j0(q`iS`Sr>0)#;>#CZs#@=k=G`zR<~k?Ez^K*=ofUfIMz&SRXsVE{sYQjpQ8-oNAh2VAeG&Y|`lDY$i1^T^7|Mgc^d024Q=Q zPIGYLl9c8j#F{swva(5SHk0Q;8M+t}oeXrfeefv*E1<;(+k5U^k^%dpYwoI9VKL@L zR*Q_LDVrFvd-X@j53Y0S!9Xo`ct6R66jQ;UDi43q<4dyf&omsMqLntO`&?qSRRt- zYUIUfabGsr9mYk`UO#TGCp1%sakb~>%=i${y3?2+Ma>S@mOGkCUKwS~B0$q3GpejX zeigH-XpcG6$>c@YkPO)D7tCPmG-mqf=} zP?YQtWEj}QoD~IVZwWfh?T7JMb5&>&&>AD4kc~HOD@jeWgEdvF#gL`ZcnCaPNR9F=TY7vGgN@AG? z;bT-5Mp>BDfT;H1qp&E(N6Ap1m_3bn7!&qr1h1Rln7_hgpM%c7dPS5-38VuS9v7m^-La}@1=n?HuNemPwktD}(~ zscHQD1hjpI1l9B-9MIx(8hipqXj0Uipr(A!@MM~MY=-6~(cb07=(vX_Wl3yWJ5Mxj zmDR*)%V_E?x`k~(o!Owv_MCtKMrc#X%hQ}wvct%PsL3>&h_ZtPI-9*Z+I-iNg%U|>ymR@v04V5oaq>}aYGRng#Hi~*bW zmH?}{#W2uCKv6TpsE{3uqWv{9vqIBcXC*0u4nrbq-65zfMn_%}YpgP3_-5K_bWc+c zV^-A+&5q`p)dk90WMG|a+JFdXP_1fSoAy*PIM>YPr#a=!>AIw*{md{tlJYd?RI)-c z;L8gnDNCZ776Yd2vIr<@>M$y6Hm)X$Ing%CO*K)|(`gzVV$&lDXmM6p)YV{|ArPk` z;4n6qC6TokD>%)?4uhgSjB`R#ndTQmooWp;3~W{w0hNVG0vZ!k8iu>%w1tpmQ&?S~ z45&qjNe%%4jQ&r9(CMVw<4jD~Y!1H8i3&|rSQ*(It17HbMe7R2bv0Hut4dA++D^?i zrw=23*7Q+OwAf)l_cSF2rHSHlL26o zB7lgJkmjz-dN4Chnm_2d8m$SKWagR!8f_okR0pz})HHbTnxrP178-o-IgWK{@8Ez+ zUYa{NC9NH-Fg7=m;#6`@im|cD3hQ{I<)uCb4{tae-o4?@i^5a@n>C{iPG9LIce;VLxH5W z6P27q(`9Mu(>48tBqb?UHC>kW{7FZ13k8tZLGzwAAZckmV4|kfxI*^Js>!(;P5V)D zB4C0v*pHG|W{R}e7N@c(pGi|q*Mi5oM3%IGuDKv9PzHiz(z+att&EJcW#!24ph4-W zWQfupYc`jh2F>w{XunT_K60V?J}YBsCh3r#kaV=&=?Ynz?wbi&Hc2eWlA3E;n%g%Z zn<^~r^~sKun(J##O#|d5HP$CEFlEyK1Q5w-LS~et$k2369qEw>QAS>ksFI<~G-*&f znmNGHo>J0dS*(jQtBC?x8P6;LG-*Ar0}p88P*ZB_k=1m8u8FXjkf@Rqj1wsG;)_pq zI1-Ka@NwXqJTjp*XkyCZRE@TvDPf{!hE+4`Xy!C&5WY)ed1gU`1;COM%-S>uL7#yX zqRXl#iof@sMOluNr}nTxiU4p)FgcO$E9@ zQj;@+K^+MLU7wJnIp#F9UkZye2VB#j!Yrxz24rcy)$~kIOuO^4BrRqHmeY2 zEm$0xu`Y5mRnv88(=#F3tR_$-qU3a0nFJN!(VPkl48Rg`<|c9+Nm!vv0Fl&8ndXvN z)sP^LyrgDIXr4_MNKytMLzV!tNddB?tZGD9Cz}avD~<9aKOl>m8wrYF)@6AtM{^x% ziK=U6*fcRS0~)3bWMw7-3R;#~AxW7y*W|EJU|`A(D06{0Q&u%VlA06Nr9s$d7k~6DmX)iD!IhA#3eqjf#EsBExY@(T2j>ZB5aw98&0XbEZ!lEp#Q4+&I zmn?{;S%EI8L4kqG+DHmYS|aMv9%a4OZTJ5I0|b<1Eo`hw&4R}N`~S1Ts{9KK{QC!S zt_f-G|DB+nKSWW_-}w`Kdq1b2`?p_UfIvIzfBpUZ-~0pv*oT^(g$DJ1bIFe{FyOIuY#if948g{l9Y=hE7vIHi5C57I zmL)yIj|N!y3SUYQz5zi8!guh!HNW5Y@V!WLbJE^DL1k5vk_g^oZ4?ORq(S%Jocn*j zu4&Nxy`r)l(Ez{y?*#BYLebv4z`y|S*QA*$?cUa1-L2>x3Ec$-2278;bARt{+&+M$ zn4n5-w+jrw_5m>M+1UxOW}g_lG{CYeI{^=v{Fr@x*D(b1V=WL=)_SC{HkaifARvea zXsP86p%cgf zlhPJ8g+5_fjsY%$2b+UFu{kUKyz%pjoIdhGfDI7+J&y*sro@yTjitHgl=A{C2SGc> zIGm}{*g1joij8GO2%z%;Tws8}Isb2~qfu!7{msAM`tVy|fWUowXHs+T_Rb`RAd_hV zf)>56t0e`9)1J4$0D&|hAntDts7&nUk{j9 zM+3aq1FUNLV2|crw|8AlREGnSl!nsWbtNk-?V4b+>vc^qKy*E2K$U0!%yKQIg@2x| zhyUDIol5IxzM4`9MSrI5jcmSM*}=}K0qjQO_Bi! zuK8hs0RnE`ZrV)HVl!;UG9Gr#)ij}@5VYs!+{~c~6EHE?H6?sq3IyxR%hrT|)4@O$ zf=Lj!!R5Ey(OjU8BN||tfIvB_610yVnSdip1x!nekzZti0RnjE zglvKAAWKU7*N^$Jw2uik)2y`k-~-7^La@o)<9F=1vf8ma{&*}$i;gkr9%faX0vhe+dZM{>Pt`h7uvDrp!IT#H3?@b{$}Hy2D38c1ySzI^q5*byVRu4fT~dp?0s{qD z4gvydQratKWtIhNU4lw(t`!*AToOaDx~hwLCQFOD0s{n4vFUUPn#b+9(EyF(VwiPB z+D6YLX&OCofb7M!G6HY71GI$;cLJ`!0KtRX9Blu#`P&74yKkxK(DXqhBtQlPp-EYm zV{$HQy0?}UA-HDc)-AhrOHtECE)Wnnx9uMbNrUE3(D+qgV1S$oRzFVE#6Xrodq2RY zk^{J`Cd#6;b`=<4Z|mBitMokAtdfoaoU6gNhPt`8tpT*LiUC@b1Xu%1RHMOutrv*m zTAwXSQV1}L(gB+FYRjsu*MzJCJRSg>nahGS$J&0?SE-OCwO$n%hz8gt*n}*p#Z|ca zRxby^yUq%$(pFVqK%>H^+MKqJidmIO^QZ)xN>UlkqHY`@wiuNlbcL=-(P%HKzyLwC zspJ3;o5P06kf53-C20%QvMR2*(Fm1NfdNQ>{ZoMfNy*Rv1hb#=)15CqtK5nA5B}8q0)c@?0A$Vd5H~ z{kyEHc}bQPxum15eiRrOAbx`%FR8tE-_cwRf@^j(*`y%tLsnKr15~tct|%}tfVGiL zy_zzQ0s{mX^!RDdd!wNYK#*;)NmX(dwCy;++(=A|y$M;=guuBuN{OH`$K|LT8!d8# z{1}@iraAtN0t3+ilGeL18el60SdIoaQq(jn?QL`0o}iUNP7Fa~-Kg=Y(^$#PWg)cp z7+c54abpx1z%VHh$ZK9%WjUe&LYqq(5L8x2dNh;-@P=Sc=AAHX5qQf^Zz%WGUe!ZKuIU&IS z({K4-JHJ1t42?&_-57Gd_PYn)cX?7!@bD2n=dm#k%meR>%h0)QKp+~}Ns-s64PPUS zzaD{LX(&T~*ssprKM06~1m=d!&;J`cI)bAD@fyZ{@z=j09~4A1wx!665H{>x2n7d- zhEl}eb1^Q|*L_bCQ2-t4KGpaDL5QgD_i{tWMPktB`aysD{C^iP36*w#z5OT!6@>o& z-T%kix)2V^W1$r3h7QKp3$?o!6eN@frHFIwe*3;->{yvN7JQ}1YK*Ju{(SXCR4A2> zPZW-EG+KxqD`QKT%ka`0j&I-4!6D z$Tx&;`@sbZ!bKOl=)140E+!OQocHR3Z_tnuo+10cbzm$C4yyNCx6UyyNDxZvKDh6A z7a|H2-pGz!{UMA74hWHkP@Q9MZFTSgLW1Gv4K>FR6d=SXMf5lNcI5cpenMli z5Fu3Nj^9w@WpZ%P3t>aJ@u`CfD~^a`7y9$^TZS%T5@CoCMsfgva0P4mJ!L93phBFh=;Ca}AUTM7w+H+7Rl$Gc+0w+)EL- z?(w#}=JtreLc(jfuib6r2Vr9YQm_!ZeccNoYfmZ?NOT^*qxQRgs30K}v`Z0osLwS= z=S~$692y^Cze4>-LL}kUz1HWtt;(=aDR>Y%R;TXnaZ98H%xftUAIARIg@{Bb^hy!e zy06>L(Y-$=1{i%1!uRJKAwPm8lHeZ3c743=9_CWu@Q^n)lsH5lHcF9x7~gyC-o@33 z2ndB4y1Q-WT}%SRK~#4UuJ?0(U4VI5)-^w5=v;j2q5=Y?;_i>1!rk=A?W_^qnC z?ke{dBs3oW@ad{+RA+}lL?x;(bk8A!#Uk1FqP|;o_|23{gXA5A?BYrW7RTGS+R}94`g0OQL&K{t{J7uD9Wjz6!u;NA-(TmPN+=Rczo@Vot~2b10;E9sy$JPbqrlgytO@v(Ab@^HZL#n^RvRkLt+29 zUFZ7o*QH5dIpZyhB6b}AD=6CE?c^M`Q39b*qf3FX6 zbO#P;^8B?SbWz>i6<{C@Rw?pUAJq5%>J&~K5d|Kl)(ExrTQ@dD92VR`XN?S>yH~(m zB!!Nb*DZ|fxZ&V%;eD^5K!G7TDbn%x^}bgaokLVA7U;COSG)gv9|;B!v43A&_n^>l z5aWx>@7}I{TzG(B9>0%KpTB302o;Iq{QcpxtG;~-mj;OUd#-nz)!iuo&fAzW=SUAmRa`%TA#x?=!!|z=5)m`TM!`2p2jW6i{}R*WF(~eg2qWAR0c7 ztM3XD9x(5uNQ3aByI%*}xk4lX!5>`M>+Vn^ggjg#`&jqgx(Jyg1p*OSe(pKc$G`0d z5d{Qy-RiIUaNRTI!J*MCMOwSvWq%O%cf?c>Ed1{Ism5K%&|+}#&`^rJa(g@Leq5eF zCJOscV8Cr;=<53W?JIzg9?(T)=5?*`!GeZ_3bPb(UdRwS!^a;L8XS$+QY6CY9i#fb zGCLwt$Buu}Kukb|(W84D_#K9{?lvN?KZuIDU zafmQHlp<~2TV=R!_rYgSkuGr+Lih|Ljz=T9$1lF<`gI`=ED-S9MRtx2TQ3R=Qag1A z-MPN!Sc#Aro_qVQqtnQ6BM}i2Ln-17{oA8EjxZ`fFq4jtwI8oPbwDtaT`BU`qwC%n zT|cZeOdcLe5$O6o-H!h5D`|kZFq9(CFhbYR-xs>c#IfK}iu`NehWwUw;X*?L0-qFV z@kQs{yFtML!lAqPp6{-%CGkk9ub76QpCZ?&#Ce=|6(u;2JxkcM#s** zTi3X60Rh6myP5r)ceMc_!k`^L_xtNFq#!6b*kjxXy)kKEcu|Tte+*yWLe_^7m|vuh+g3f!d#zx!)^_u`9QL5NiJNs;y*H6B z@Ztaw*=BC1`Yt|)NG6t6cKh$J-NR5I8r({e78h>sI)2V^1L8p8;Vyf#yL%78@ql