From a4d3e0c747ebc82ce7b5a706883de5976d8e8c62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Mon, 29 Jul 2024 12:05:18 +0200 Subject: [PATCH 01/37] Adopt list_streaming in tenant deletion (#8504) Uses the Stream based `list_streaming` function added by #8457 in tenant deletion, as suggested in https://github.com/neondatabase/neon/pull/7932#issuecomment-2150480180 . We don't have to worry about retries, as the function is wrapped inside an outer retry block. If there is a retryable error either during the listing or during deletion, we just do a fresh start. Also adds `+ Send` bounds as they are required by the `delete_tenant_remote` function. --- libs/remote_storage/src/lib.rs | 6 +-- libs/remote_storage/src/simulate_failures.rs | 2 +- pageserver/src/tenant/mgr.rs | 52 ++++++++++---------- 3 files changed, 29 insertions(+), 31 deletions(-) diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 75aa28233b..031548bbec 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -194,7 +194,7 @@ pub trait RemoteStorage: Send + Sync + 'static { mode: ListingMode, max_keys: Option, cancel: &CancellationToken, - ) -> impl Stream>; + ) -> impl Stream> + Send; async fn list( &self, @@ -351,10 +351,10 @@ impl GenericRemoteStorage> { mode: ListingMode, max_keys: Option, cancel: &'a CancellationToken, - ) -> impl Stream> + 'a { + ) -> impl Stream> + 'a + Send { match self { Self::LocalFs(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)) - as Pin>>>, + as Pin> + Send>>, Self::AwsS3(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)), Self::AzureBlob(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)), Self::Unreliable(s) => Box::pin(s.list_streaming(prefix, mode, max_keys, cancel)), diff --git a/libs/remote_storage/src/simulate_failures.rs b/libs/remote_storage/src/simulate_failures.rs index 67e5be2955..13f873dcdb 100644 --- a/libs/remote_storage/src/simulate_failures.rs +++ b/libs/remote_storage/src/simulate_failures.rs @@ -114,7 +114,7 @@ impl RemoteStorage for UnreliableWrapper { mode: ListingMode, max_keys: Option, cancel: &CancellationToken, - ) -> impl Stream> { + ) -> impl Stream> + Send { async_stream::stream! { self.attempt(RemoteOp::ListPrefixes(prefix.cloned())) .map_err(DownloadError::Other)?; diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 75c8682c97..5e1f69f4c1 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -1384,34 +1384,32 @@ impl TenantManager { tenant_shard_id: TenantShardId, ) -> Result<(), DeleteTenantError> { let remote_path = remote_tenant_path(&tenant_shard_id); - let keys = match self - .resources - .remote_storage - .list( - Some(&remote_path), - remote_storage::ListingMode::NoDelimiter, - None, - &self.cancel, - ) - .await - { - Ok(listing) => listing.keys, - Err(remote_storage::DownloadError::Cancelled) => { - return Err(DeleteTenantError::Cancelled) - } - Err(remote_storage::DownloadError::NotFound) => return Ok(()), - Err(other) => return Err(DeleteTenantError::Other(anyhow::anyhow!(other))), - }; + let mut keys_stream = self.resources.remote_storage.list_streaming( + Some(&remote_path), + remote_storage::ListingMode::NoDelimiter, + None, + &self.cancel, + ); + while let Some(chunk) = keys_stream.next().await { + let keys = match chunk { + Ok(listing) => listing.keys, + Err(remote_storage::DownloadError::Cancelled) => { + return Err(DeleteTenantError::Cancelled) + } + Err(remote_storage::DownloadError::NotFound) => return Ok(()), + Err(other) => return Err(DeleteTenantError::Other(anyhow::anyhow!(other))), + }; - if keys.is_empty() { - tracing::info!("Remote storage already deleted"); - } else { - tracing::info!("Deleting {} keys from remote storage", keys.len()); - let keys = keys.into_iter().map(|o| o.key).collect::>(); - self.resources - .remote_storage - .delete_objects(&keys, &self.cancel) - .await?; + if keys.is_empty() { + tracing::info!("Remote storage already deleted"); + } else { + tracing::info!("Deleting {} keys from remote storage", keys.len()); + let keys = keys.into_iter().map(|o| o.key).collect::>(); + self.resources + .remote_storage + .delete_objects(&keys, &self.cancel) + .await?; + } } Ok(()) From d8205248e215c170fad1a3141330cbf5563fcf55 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Mon, 29 Jul 2024 14:35:12 +0200 Subject: [PATCH 02/37] Add a test for clickhouse as a logical replication consumer (#8408) ## Problem We need to test logical replication with 3rd-party tools regularly. ## Summary of changes Added a test using ClickHouse as a client Co-authored-by: Alexander Bayandin --- .github/workflows/pg-clients.yml | 72 ++++++++++ poetry.lock | 153 +++++++++++++++++++++- pyproject.toml | 1 + test_runner/logical_repl/test_log_repl.py | 88 +++++++++++++ 4 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 test_runner/logical_repl/test_log_repl.py diff --git a/.github/workflows/pg-clients.yml b/.github/workflows/pg-clients.yml index e21e45c929..55b68ccdb5 100644 --- a/.github/workflows/pg-clients.yml +++ b/.github/workflows/pg-clients.yml @@ -13,6 +13,7 @@ on: paths: - '.github/workflows/pg-clients.yml' - 'test_runner/pg_clients/**' + - 'test_runner/logical_repl/**' - 'poetry.lock' workflow_dispatch: @@ -49,6 +50,77 @@ jobs: image-tag: ${{ needs.check-build-tools-image.outputs.image-tag }} secrets: inherit + test-logical-replication: + needs: [ build-build-tools-image ] + runs-on: ubuntu-22.04 + + container: + image: ${{ needs.build-build-tools-image.outputs.image }} + credentials: + username: ${{ secrets.NEON_DOCKERHUB_USERNAME }} + password: ${{ secrets.NEON_DOCKERHUB_PASSWORD }} + options: --init --user root + services: + clickhouse: + image: clickhouse/clickhouse-server:24.6.3.64 + ports: + - 9000:9000 + - 8123:8123 + + steps: + - uses: actions/checkout@v4 + + - name: Download Neon artifact + uses: ./.github/actions/download + with: + name: neon-${{ runner.os }}-${{ runner.arch }}-release-artifact + path: /tmp/neon/ + prefix: latest + + - name: Create Neon Project + id: create-neon-project + uses: ./.github/actions/neon-project-create + with: + api_key: ${{ secrets.NEON_STAGING_API_KEY }} + postgres_version: ${{ env.DEFAULT_PG_VERSION }} + + - name: Run tests + uses: ./.github/actions/run-python-test-set + with: + build_type: remote + test_selection: logical_repl + run_in_parallel: false + extra_params: -m remote_cluster + pg_version: ${{ env.DEFAULT_PG_VERSION }} + env: + BENCHMARK_CONNSTR: ${{ steps.create-neon-project.outputs.dsn }} + + - name: Delete Neon Project + if: always() + uses: ./.github/actions/neon-project-delete + with: + project_id: ${{ steps.create-neon-project.outputs.project_id }} + api_key: ${{ secrets.NEON_STAGING_API_KEY }} + + - name: Create Allure report + if: ${{ !cancelled() }} + id: create-allure-report + uses: ./.github/actions/allure-report-generate + with: + store-test-results-into-db: true + env: + REGRESS_TEST_RESULT_CONNSTR_NEW: ${{ secrets.REGRESS_TEST_RESULT_CONNSTR_NEW }} + + - name: Post to a Slack channel + if: github.event.schedule && failure() + uses: slackapi/slack-github-action@v1 + with: + channel-id: "C06KHQVQ7U3" # on-call-qa-staging-stream + slack-message: | + Testing the logical replication: <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|${{ job.status }}> (<${{ steps.create-allure-report.outputs.report-url }}|test report>) + env: + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} + test-postgres-client-libs: needs: [ build-build-tools-image ] runs-on: ubuntu-22.04 diff --git a/poetry.lock b/poetry.lock index 5192a574cc..d7a3dde65b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -870,6 +870,96 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} +[[package]] +name = "clickhouse-connect" +version = "0.7.17" +description = "ClickHouse Database Core Driver for Python, Pandas, and Superset" +optional = false +python-versions = "~=3.8" +files = [ + {file = "clickhouse-connect-0.7.17.tar.gz", hash = "sha256:854f1f9f3e024e7f89ae5d57cd3289d7a4c3dc91a9f24c4d233014f0ea19cb2d"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:aca36f5f28be1ada2981fce87724bbf451f267c918015baec59e527de3c9c882"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:66209e4634f457604c263bea176336079d26c284e251e68a8435b0b80c1a25ff"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4d86c5a561a2a99321c8b4af22257461b8e67142f34cfea6e70f39b45b1f406"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d200c9afa2725a96f9f3718221f641276b80c11bf504d8a2fbaafb5a05b2f0d3"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:004d867b1005445a46e6742db1054bf2a717a451372663b46e09b5e9e90a31e3"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:4ef94a4a8e008882259151833c3c47cfbb9c8f08de0f100aaf3b95c366dcfb24"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:ee732c3df50c8b07d16b5836ff85e6b84569922455c03837c3add5cf1388fe1f"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d9dbe1235465bb946e24b90b0ca5b8800b5d645acb2d7d6ee819448c3e2fd959"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-win32.whl", hash = "sha256:e5db0d68dfb63db0297d44dc91406bcfd7d333708d7cd55086c8550fbf870b78"}, + {file = "clickhouse_connect-0.7.17-cp310-cp310-win_amd64.whl", hash = "sha256:800750f568c097ea312887785025006d6098bffd8ed2dd6a57048fb3ced6d778"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4eb390623b3d15dc9cda78f5c68f83ef9ad11743797e70af8fabc384b015a73c"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:35f172ca950f218f63072024c81d5b4ff6e5399620c255506c321ccc7b17c9a5"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae7918f060f7576fc931c692e0122b1b07576fabd81444af22e1f8582300d200"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff2881b93c7a1afb9c99fb59ad5fd666850421325d0931e2b77f3f4ba872303d"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8a4d9b4f97271addf66aadbaf7f154f19a0ad6c22026d575a995c55ebd8576db"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e431469b1ff2d5c3e4c406d55c6afdf7102f5d2524c2ceb5481b94ac24412aa3"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:2b6f80115176559f181a6b3ecad11aa3d70ef6014c3d2905b90fcef3f27d25c2"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d8ac694f40dfafc8a3cc877116b4bc73e8877ebf66d4d96ee092484ee4c0b481"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-win32.whl", hash = "sha256:78b7a3f6b0fad4eaf8afb5f9a2e855bde53e82ea5804960e9cf779538f4606a1"}, + {file = "clickhouse_connect-0.7.17-cp311-cp311-win_amd64.whl", hash = "sha256:efd390cc045334ecc3f2a9c18cc07c041d0288b145967805fdcab65abeefa75f"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9228334a17dc0a7842222f54ba5b89fc563532424aad4f66be799df70ab37e9f"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e432a42bb788bda77e88eda2774392a60fbbb5ee2a79cb2881d182d26c45fe49"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c85152ed2879965ee1fa2bd5e31fb27d281fd5f50d6e86a401efd95cd85b29ef"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:29a126104aa5e11df570cbd89fca4988784084602ba77d17b2396b334c54fd75"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:882d8f9570549258e6eb6a97915fbf64ed29fe395d5e360866ea8d42c8283a35"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:06ebf99111171442f462fb8b357364c3e276da3e8f8557b2e8fee9eb55ab37d1"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e0cf6f99b2777b0d164bf8b65ec39104cdc0789a56bcb52d98289bbd6f5cc70e"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:ee46c508fddfff3b7ac52326788e0c6dd8dfb416b6d7e02e5d30e8110749dac2"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-win32.whl", hash = "sha256:eb708b590a37d56b069a6088254ffa55d73b8cb65527339df81ef03fe67ffdec"}, + {file = "clickhouse_connect-0.7.17-cp312-cp312-win_amd64.whl", hash = "sha256:17f00dccddaeaf43733faa1fa21f7d24641454a73669fda862545ba7c88627f5"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ab5d4b37a6dcc39e94c63beac0f22d9dda914f5eb865d166c64cf04dfadb7d16"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32aa90387f45f34cbc5a984789ed4c12760a3c0056c190ab0123ceafc36b1002"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:21277b6bdd6c8ff14170bfcd52125c5c39f442ec4bafbb643ad7d0ca915f0029"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca68d8b7dee3fb4e7229e06152f5b0faaccafb4c87d9c2d48fa5bd117a3cc1c0"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:841c56282102b2fba1e0b332bb1c7a0c50992fbc321746af8d3e0e6ca2450e8b"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8d7ffde5a4b95d8fe9ed38e08e504e497310e3d7a17691bd40bf65734648fdfc"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:055960086b6b92b6e44f5ba04c81c40c10b038588e4b3908b033c99f66125332"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:36491fec63ceb8503b6344c23477647030139f346b749dc5ee672c505939dbbe"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-win32.whl", hash = "sha256:8779a907e026db32e6bc0bc0c8d5de0e2e3afd166afc2d4adcc0603399af5539"}, + {file = "clickhouse_connect-0.7.17-cp38-cp38-win_amd64.whl", hash = "sha256:309854fa197885c6278438ddd032ab52e6fec56f162074e343c3635ca7266078"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e8009f94550178dc971aeb4f8787ba7a5b473c22647490428b7229f540a51d2b"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:70f8422f407b13a404b3670fd097855abd5adaf890c710d6678d2b46ab61ac48"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:082783eb1e8baf7b3465dd045132dc5cb5a91432c899dc4e19891c5f782d8d23"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c1c30aad2a9c7584c4ee19e646a087b3bbd2d4daab3d88a2afeeae1a7f6febf9"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc8e245a9f4f0dce39f155e626405f60f1d3cf4d1e52dd2c793ea6b603ca111b"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:802372cb8a69c9ffdf4260e9f01616c8601ba531825ed6f08834827e0b880cd1"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:193a60271a3b105cdbde96fb20b40eab8a50fca3bb1f397546f7a18b53d9aa9c"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:59d58932916792fdbd09cb961a245a0c2d87b07b8296f9138915b998f4522941"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-win32.whl", hash = "sha256:3cfd0edabb589f640636a97ffc38d1b3d760faef208d44e50829cc1ad3f0d3e5"}, + {file = "clickhouse_connect-0.7.17-cp39-cp39-win_amd64.whl", hash = "sha256:5661b4629aac228481219abf2e149119af1a71d897f191665e182d9d192d7033"}, + {file = "clickhouse_connect-0.7.17-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:7429d309109e7e4a70fd867d69fcfea9ddcb1a1e910caa6b0e2c3776b71f4613"}, + {file = "clickhouse_connect-0.7.17-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5ae619151006da84a0b1585a9bcc81be32459d8061aeb2e116bad5bbaa7d108"}, + {file = "clickhouse_connect-0.7.17-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec0c84a0880621cb2389656a89886ef3133f0b3f8dc016eee6f25bbb49ff6f70"}, + {file = "clickhouse_connect-0.7.17-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705464c23f821666b76f8f619cf2870225156276562756b3933aaa24708e0ff8"}, + {file = "clickhouse_connect-0.7.17-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1822016f4b769e89264fe26cefe0bc5e50e4c3ca0747d89bb52d57dc4f1e5ffb"}, + {file = "clickhouse_connect-0.7.17-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:6c92b0c342c1fbfa666010e8175e05026dc570a7ef91d8fa81ce503180f318aa"}, + {file = "clickhouse_connect-0.7.17-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2e106536540e906c3c866f8615fcf870a9a77c1bfab9ef4b042febfd2fdb953"}, + {file = "clickhouse_connect-0.7.17-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bac9a32e62384b4341ba51a451084eb3b00c6e59aaac1499145dd8b897cb585c"}, + {file = "clickhouse_connect-0.7.17-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0feed93b9912b7862a8c41be1febcd44b68a824a5c1059b19d5c567afdaa6273"}, + {file = "clickhouse_connect-0.7.17-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:2e2dd6db52e799f065fd565143fde5a872cfe903de1bee7775bc3a349856a790"}, + {file = "clickhouse_connect-0.7.17-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ed13add5d579a5960155f3000420544368501c9703d2fb94f103b4a6126081f6"}, + {file = "clickhouse_connect-0.7.17-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c257a23ed3bf1858593fb03927d9d073fbbdfa24dc2afee537c3314bd66b4e24"}, + {file = "clickhouse_connect-0.7.17-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d47866f64cbdc2d5cc4f8a7a8c49e3ee90c9e487091b9eda7c3a3576418e1cbe"}, + {file = "clickhouse_connect-0.7.17-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9b850e2f17e0a0b5a37d996d3fb728050227489d64d271d678d166abea94f26e"}, + {file = "clickhouse_connect-0.7.17-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:349682288987dc84ac7695f7cd6b510be8d0ec0eee7c1b72dbf2146b4e9efdb8"}, +] + +[package.dependencies] +certifi = "*" +lz4 = "*" +pytz = "*" +urllib3 = ">=1.26" +zstandard = "*" + +[package.extras] +arrow = ["pyarrow"] +numpy = ["numpy"] +orjson = ["orjson"] +pandas = ["pandas"] +sqlalchemy = ["sqlalchemy (>1.3.21,<2.0)"] +tzlocal = ["tzlocal (>=4.0)"] + [[package]] name = "colorama" version = "0.4.5" @@ -1470,6 +1560,56 @@ files = [ {file = "lazy_object_proxy-1.10.0-pp310.pp311.pp312.pp38.pp39-none-any.whl", hash = "sha256:80fa48bd89c8f2f456fc0765c11c23bf5af827febacd2f523ca5bc1893fcc09d"}, ] +[[package]] +name = "lz4" +version = "4.3.3" +description = "LZ4 Bindings for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "lz4-4.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b891880c187e96339474af2a3b2bfb11a8e4732ff5034be919aa9029484cd201"}, + {file = "lz4-4.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:222a7e35137d7539c9c33bb53fcbb26510c5748779364014235afc62b0ec797f"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f76176492ff082657ada0d0f10c794b6da5800249ef1692b35cf49b1e93e8ef7"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d18718f9d78182c6b60f568c9a9cec8a7204d7cb6fad4e511a2ef279e4cb05"}, + {file = "lz4-4.3.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6cdc60e21ec70266947a48839b437d46025076eb4b12c76bd47f8e5eb8a75dcc"}, + {file = "lz4-4.3.3-cp310-cp310-win32.whl", hash = "sha256:c81703b12475da73a5d66618856d04b1307e43428a7e59d98cfe5a5d608a74c6"}, + {file = "lz4-4.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:43cf03059c0f941b772c8aeb42a0813d68d7081c009542301637e5782f8a33e2"}, + {file = "lz4-4.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:30e8c20b8857adef7be045c65f47ab1e2c4fabba86a9fa9a997d7674a31ea6b6"}, + {file = "lz4-4.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2f7b1839f795315e480fb87d9bc60b186a98e3e5d17203c6e757611ef7dcef61"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edfd858985c23523f4e5a7526ca6ee65ff930207a7ec8a8f57a01eae506aaee7"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0e9c410b11a31dbdc94c05ac3c480cb4b222460faf9231f12538d0074e56c563"}, + {file = "lz4-4.3.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d2507ee9c99dbddd191c86f0e0c8b724c76d26b0602db9ea23232304382e1f21"}, + {file = "lz4-4.3.3-cp311-cp311-win32.whl", hash = "sha256:f180904f33bdd1e92967923a43c22899e303906d19b2cf8bb547db6653ea6e7d"}, + {file = "lz4-4.3.3-cp311-cp311-win_amd64.whl", hash = "sha256:b14d948e6dce389f9a7afc666d60dd1e35fa2138a8ec5306d30cd2e30d36b40c"}, + {file = "lz4-4.3.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e36cd7b9d4d920d3bfc2369840da506fa68258f7bb176b8743189793c055e43d"}, + {file = "lz4-4.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:31ea4be9d0059c00b2572d700bf2c1bc82f241f2c3282034a759c9a4d6ca4dc2"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:33c9a6fd20767ccaf70649982f8f3eeb0884035c150c0b818ea660152cf3c809"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bca8fccc15e3add173da91be8f34121578dc777711ffd98d399be35487c934bf"}, + {file = "lz4-4.3.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e7d84b479ddf39fe3ea05387f10b779155fc0990125f4fb35d636114e1c63a2e"}, + {file = "lz4-4.3.3-cp312-cp312-win32.whl", hash = "sha256:337cb94488a1b060ef1685187d6ad4ba8bc61d26d631d7ba909ee984ea736be1"}, + {file = "lz4-4.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:5d35533bf2cee56f38ced91f766cd0038b6abf46f438a80d50c52750088be93f"}, + {file = "lz4-4.3.3-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:363ab65bf31338eb364062a15f302fc0fab0a49426051429866d71c793c23394"}, + {file = "lz4-4.3.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0a136e44a16fc98b1abc404fbabf7f1fada2bdab6a7e970974fb81cf55b636d0"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:abc197e4aca8b63f5ae200af03eb95fb4b5055a8f990079b5bdf042f568469dd"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56f4fe9c6327adb97406f27a66420b22ce02d71a5c365c48d6b656b4aaeb7775"}, + {file = "lz4-4.3.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f0e822cd7644995d9ba248cb4b67859701748a93e2ab7fc9bc18c599a52e4604"}, + {file = "lz4-4.3.3-cp38-cp38-win32.whl", hash = "sha256:24b3206de56b7a537eda3a8123c644a2b7bf111f0af53bc14bed90ce5562d1aa"}, + {file = "lz4-4.3.3-cp38-cp38-win_amd64.whl", hash = "sha256:b47839b53956e2737229d70714f1d75f33e8ac26e52c267f0197b3189ca6de24"}, + {file = "lz4-4.3.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:6756212507405f270b66b3ff7f564618de0606395c0fe10a7ae2ffcbbe0b1fba"}, + {file = "lz4-4.3.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ee9ff50557a942d187ec85462bb0960207e7ec5b19b3b48949263993771c6205"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2b901c7784caac9a1ded4555258207d9e9697e746cc8532129f150ffe1f6ba0d"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6d9ec061b9eca86e4dcc003d93334b95d53909afd5a32c6e4f222157b50c071"}, + {file = "lz4-4.3.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f4c7bf687303ca47d69f9f0133274958fd672efaa33fb5bcde467862d6c621f0"}, + {file = "lz4-4.3.3-cp39-cp39-win32.whl", hash = "sha256:054b4631a355606e99a42396f5db4d22046a3397ffc3269a348ec41eaebd69d2"}, + {file = "lz4-4.3.3-cp39-cp39-win_amd64.whl", hash = "sha256:eac9af361e0d98335a02ff12fb56caeb7ea1196cf1a49dbf6f17828a131da807"}, + {file = "lz4-4.3.3.tar.gz", hash = "sha256:01fe674ef2889dbb9899d8a67361e0c4a2c833af5aeb37dd505727cf5d2a131e"}, +] + +[package.extras] +docs = ["sphinx (>=1.6.0)", "sphinx-bootstrap-theme"] +flake8 = ["flake8"] +tests = ["psutil", "pytest (!=3.3.0)", "pytest-cov"] + [[package]] name = "markupsafe" version = "2.1.1" @@ -2361,6 +2501,17 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "pytz" +version = "2024.1" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.1-py2.py3-none-any.whl", hash = "sha256:328171f4e3623139da4983451950b28e95ac706e13f3f2630a879749e7a8b319"}, + {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, +] + [[package]] name = "pywin32" version = "301" @@ -3206,4 +3357,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "16ebd6a46768be7f67dbdb4ee5903b167d94edc9965f29252f038c67e9e907b0" +content-hash = "7cee6a8c30bc7f4bfb0a87c6bad3952dfb4da127fad853d2710a93ac3eab8a00" diff --git a/pyproject.toml b/pyproject.toml index c7f1a07512..0d5782ac7c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ zstandard = "^0.21.0" httpx = {extras = ["http2"], version = "^0.26.0"} pytest-repeat = "^0.9.3" websockets = "^12.0" +clickhouse-connect = "^0.7.16" [tool.poetry.group.dev.dependencies] mypy = "==1.3.0" diff --git a/test_runner/logical_repl/test_log_repl.py b/test_runner/logical_repl/test_log_repl.py new file mode 100644 index 0000000000..0a1aecfe2b --- /dev/null +++ b/test_runner/logical_repl/test_log_repl.py @@ -0,0 +1,88 @@ +""" +Test the logical replication in Neon with the different consumers +""" + +import hashlib +import time + +import clickhouse_connect +import psycopg2 +import pytest +from fixtures.log_helper import log +from fixtures.neon_fixtures import RemotePostgres +from fixtures.utils import wait_until + + +def query_clickhouse( + client, + query: str, + digest: str, +) -> None: + """ + Run the query on the client + return answer if successful, raise an exception otherwise + """ + log.debug("Query: %s", query) + res = client.query(query) + log.debug(res.result_rows) + m = hashlib.sha1() + m.update(repr(tuple(res.result_rows)).encode()) + hash_res = m.hexdigest() + log.debug("Hash: %s", hash_res) + if hash_res == digest: + return + raise ValueError("Hash mismatch") + + +@pytest.mark.remote_cluster +def test_clickhouse(remote_pg: RemotePostgres): + """ + Test the logical replication having ClickHouse as a client + """ + conn_options = remote_pg.conn_options() + for _ in range(5): + try: + conn = psycopg2.connect(remote_pg.connstr()) + except psycopg2.OperationalError as perr: + log.debug(perr) + time.sleep(1) + else: + break + raise TimeoutError + cur = conn.cursor() + cur.execute("DROP TABLE IF EXISTS table1") + cur.execute("CREATE TABLE table1 (id integer primary key, column1 varchar(10));") + cur.execute("INSERT INTO table1 (id, column1) VALUES (1, 'abc'), (2, 'def');") + conn.commit() + client = clickhouse_connect.get_client(host="clickhouse") + client.command("SET allow_experimental_database_materialized_postgresql=1") + client.command( + "CREATE DATABASE db1_postgres ENGINE = " + f"MaterializedPostgreSQL('{conn_options['host']}', " + f"'{conn_options['dbname']}', " + f"'{conn_options['user']}', '{conn_options['password']}') " + "SETTINGS materialized_postgresql_tables_list = 'table1';" + ) + wait_until( + 120, + 0.5, + lambda: query_clickhouse( + client, + "select * from db1_postgres.table1 order by 1", + "ee600d8f7cd05bd0b169fa81f44300a9dd10085a", + ), + ) + cur.execute("INSERT INTO table1 (id, column1) VALUES (3, 'ghi'), (4, 'jkl');") + conn.commit() + wait_until( + 120, + 0.5, + lambda: query_clickhouse( + client, + "select * from db1_postgres.table1 order by 1", + "9eba2daaf7e4d7d27ac849525f68b562ab53947d", + ), + ) + log.debug("Sleeping before final checking if Neon is still alive") + time.sleep(3) + cur.execute("SELECT 1") From 7b3f94c1f05178cc01141b5a59d5aec7dd26d9b9 Mon Sep 17 00:00:00 2001 From: Joonas Koivunen Date: Mon, 29 Jul 2024 15:41:06 +0300 Subject: [PATCH 03/37] test: deflake test_duplicate_creation (#8536) By including comparison of `remote_consistent_lsn_visible` we risk flakyness coming from outside of timeline creation. Mask out the `remote_consistent_lsn_visible` for the comparison. Evidence: https://neon-github-public-dev.s3.amazonaws.com/reports/pr-8489/10142336315/index.html#suites/ffbb7f9930a77115316b58ff32b7c719/89ff0270bf58577a --- test_runner/regress/test_branching.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 03d6946c15..190b624a54 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -389,6 +389,11 @@ def test_duplicate_creation(neon_env_builder: NeonEnvBuilder): repeat_result = ps_http.timeline_create( env.pg_version, env.initial_tenant, success_timeline, timeout=60 ) + # remote_consistent_lsn_visible will be published only after we've + # confirmed the generation, which is not part of what we await during + # timeline creation (uploads). mask it out here to avoid flakyness. + del success_result["remote_consistent_lsn_visible"] + del repeat_result["remote_consistent_lsn_visible"] assert repeat_result == success_result finally: env.pageserver.stop(immediate=True) From 7c40266c82435e1a6e7b8f256506be3107349413 Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 29 Jul 2024 15:05:30 +0100 Subject: [PATCH 04/37] pageserver: fix return code from secondary_download_handler (#8508) ## Problem The secondary download HTTP API is meant to return 200 if the download is complete, and 202 if it is still in progress. In #8198 the download implementation was changed to drop out with success early if it over-runs a time budget, which resulted in 200 responses for incomplete downloads. This breaks storcon_cli's "tenant-warmup" command, which uses the OK status to indicate download complete. ## Summary of changes - Only return 200 if we get an Ok() _and_ the progress stats indicate the download is complete. --- pageserver/src/http/routes.rs | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 7935aeb5e9..9222123ad3 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -2129,14 +2129,24 @@ async fn secondary_download_handler( let timeout = wait.unwrap_or(Duration::MAX); - let status = match tokio::time::timeout( + let result = tokio::time::timeout( timeout, state.secondary_controller.download_tenant(tenant_shard_id), ) - .await - { - // Download job ran to completion. - Ok(Ok(())) => StatusCode::OK, + .await; + + let progress = secondary_tenant.progress.lock().unwrap().clone(); + + let status = match result { + Ok(Ok(())) => { + if progress.layers_downloaded >= progress.layers_total { + // Download job ran to completion + StatusCode::OK + } else { + // Download dropped out without errors because it ran out of time budget + StatusCode::ACCEPTED + } + } // Edge case: downloads aren't usually fallible: things like a missing heatmap are considered // okay. We could get an error here in the unlikely edge case that the tenant // was detached between our check above and executing the download job. @@ -2146,8 +2156,6 @@ async fn secondary_download_handler( Err(_) => StatusCode::ACCEPTED, }; - let progress = secondary_tenant.progress.lock().unwrap().clone(); - json_response(status, progress) } From 010203a49e4eb06733ac11241ece9f8ea6d6b8e4 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Mon, 29 Jul 2024 16:49:22 +0200 Subject: [PATCH 05/37] l0_flush: use mode=direct by default => coverage in automated tests (#8534) Testing in staging and pre-prod has been [going well](https://github.com/neondatabase/neon/issues/7418#issuecomment-2255474917). This PR enables mode=direct by default, thereby providing additional coverage in the automated tests: - Rust tests - Integration tests - Nightly pagebench (likely irrelevant because it's read-only) Production deployments continue to use `mode=page-cache` for the time being: https://github.com/neondatabase/aws/pull/1655 refs https://github.com/neondatabase/neon/issues/7418 --- pageserver/src/l0_flush.rs | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pageserver/src/l0_flush.rs b/pageserver/src/l0_flush.rs index 7fe8fedc63..8945e5accd 100644 --- a/pageserver/src/l0_flush.rs +++ b/pageserver/src/l0_flush.rs @@ -2,13 +2,23 @@ use std::{num::NonZeroUsize, sync::Arc}; use crate::tenant::ephemeral_file; -#[derive(Default, Debug, PartialEq, Eq, Clone, serde::Deserialize)] +#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize)] #[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)] pub enum L0FlushConfig { - #[default] PageCached, #[serde(rename_all = "snake_case")] - Direct { max_concurrency: NonZeroUsize }, + Direct { + max_concurrency: NonZeroUsize, + }, +} + +impl Default for L0FlushConfig { + fn default() -> Self { + Self::Direct { + // TODO: using num_cpus results in different peak memory usage on different instance types. + max_concurrency: NonZeroUsize::new(usize::max(1, num_cpus::get())).unwrap(), + } + } } #[derive(Clone)] From 3f05758d0937c28b055514ddb18532426225451f Mon Sep 17 00:00:00 2001 From: John Spray Date: Mon, 29 Jul 2024 17:50:44 +0100 Subject: [PATCH 06/37] scrubber: enable cleaning up garbage tenants from known deletion bugs, add object age safety check (#8461) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Problem Old storage buckets can contain a lot of tenants that aren't known to the control plane at all, because they belonged to test jobs that get their control plane state cleaned up shortly after running. In general, it's somewhat unsafe to purge these, as it's hard to distinguish "control plane doesn't know about this, so it's garbage" from "control plane said it didn't know about this, which is a bug in the scrubber, control plane, or API URL configured". However, the most common case is that we see only a small husk of a tenant in S3 from a specific old behavior of the software, for example: - We had a bug where heatmaps weren't deleted on tenant delete - When WAL DR was first deployed, we didn't delete initdb.tar.zst on tenant deletion ## Summary of changes - Add a KnownBug variant for the garbage reason - Include such cases in the "safe" deletion mode (`--mode=deleted`) - Add code that inspects tenants missing in control plane to identify cases of known bugs (this is kind of slow, but should go away once we've cleaned all these up) - Add an additional `-min-age` safety check similar to physical GC, where even if everything indicates objects aren't needed, we won't delete something that has been modified too recently. --------- Co-authored-by: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Co-authored-by: Arpad Müller --- storage_scrubber/src/garbage.rs | 118 ++++++++++++++++++++++++++++++-- storage_scrubber/src/main.rs | 10 ++- 2 files changed, 121 insertions(+), 7 deletions(-) diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 333269ec7e..78ecfc7232 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -5,6 +5,7 @@ use std::{ collections::{HashMap, HashSet}, sync::Arc, + time::Duration, }; use anyhow::Context; @@ -18,7 +19,7 @@ use utils::id::TenantId; use crate::{ cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData}, - init_remote, init_remote_generic, + init_remote, init_remote_generic, list_objects_with_retries, metadata_stream::{stream_tenant_timelines, stream_tenants}, BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth, }; @@ -27,6 +28,11 @@ use crate::{ enum GarbageReason { DeletedInConsole, MissingInConsole, + + // The remaining data relates to a known deletion issue, and we're sure that purging this + // will not delete any real data, for example https://github.com/neondatabase/neon/pull/7928 where + // there is nothing in a tenant path apart from a heatmap file. + KnownBug, } #[derive(Serialize, Deserialize, Debug)] @@ -72,6 +78,15 @@ impl GarbageList { } } + /// If an entity has been identified as requiring purge due to a known bug, e.g. + /// a particular type of object left behind after an incomplete deletion. + fn append_buggy(&mut self, entity: GarbageEntity) { + self.items.push(GarbageItem { + entity, + reason: GarbageReason::KnownBug, + }); + } + /// Return true if appended, false if not. False means the result was not garbage. fn maybe_append(&mut self, entity: GarbageEntity, result: Option) -> bool where @@ -219,6 +234,71 @@ async fn find_garbage_inner( assert!(project.tenant == tenant_shard_id.tenant_id); } + // Special case: If it's missing in console, check for known bugs that would enable us to conclusively + // identify it as purge-able anyway + if console_result.is_none() { + let timelines = stream_tenant_timelines(&s3_client, &target, tenant_shard_id) + .await? + .collect::>() + .await; + if timelines.is_empty() { + // No timelines, but a heatmap: the deletion bug where we deleted everything but heatmaps + let tenant_objects = list_objects_with_retries( + &s3_client, + &target.tenant_root(&tenant_shard_id), + None, + ) + .await?; + let object = tenant_objects.contents.as_ref().unwrap().first().unwrap(); + if object.key.as_ref().unwrap().ends_with("heatmap-v1.json") { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)"); + garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); + continue; + } else { + tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key.as_ref().unwrap()); + } + } else { + // A console-unknown tenant with timelines: check if these timelines only contain initdb.tar.zst, from the initial + // rollout of WAL DR in which we never deleted these. + let mut any_non_initdb = false; + + for timeline_r in timelines { + let timeline = timeline_r?; + let timeline_objects = list_objects_with_retries( + &s3_client, + &target.timeline_root(&timeline), + None, + ) + .await?; + if timeline_objects + .common_prefixes + .as_ref() + .map(|v| v.len()) + .unwrap_or(0) + > 0 + { + // Sub-paths? Unexpected + any_non_initdb = true; + } else { + let object = timeline_objects.contents.as_ref().unwrap().first().unwrap(); + if object.key.as_ref().unwrap().ends_with("initdb.tar.zst") { + tracing::info!("Timeline {timeline} contains only initdb.tar.zst"); + } else { + any_non_initdb = true; + } + } + } + + if any_non_initdb { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains timelines, one or more of which are more than just initdb"); + } else { + tracing::info!("Tenant {tenant_shard_id}: is missing in console and contains only timelines that only contain initdb"); + garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); + continue; + } + } + } + if garbage.maybe_append(GarbageEntity::Tenant(tenant_shard_id), console_result) { tracing::debug!("Tenant {tenant_shard_id} is garbage"); } else { @@ -349,9 +429,6 @@ pub async fn get_timeline_objects( tracing::debug!("Listing objects in timeline {ttid}"); let timeline_root = super::remote_timeline_path_id(&ttid); - // TODO: apply extra validation based on object modification time. Don't purge - // timelines whose index_part.json has been touched recently. - let list = s3_client .list( Some(&timeline_root), @@ -422,6 +499,7 @@ impl DeletionProgressTracker { pub async fn purge_garbage( input_path: String, mode: PurgeMode, + min_age: Duration, dry_run: bool, ) -> anyhow::Result<()> { let list_bytes = tokio::fs::read(&input_path).await?; @@ -459,6 +537,7 @@ pub async fn purge_garbage( .filter(|i| match (&mode, &i.reason) { (PurgeMode::DeletedAndMissing, _) => true, (PurgeMode::DeletedOnly, GarbageReason::DeletedInConsole) => true, + (PurgeMode::DeletedOnly, GarbageReason::KnownBug) => true, (PurgeMode::DeletedOnly, GarbageReason::MissingInConsole) => false, }); @@ -487,6 +566,37 @@ pub async fn purge_garbage( let mut progress_tracker = DeletionProgressTracker::default(); while let Some(result) = get_objects_results.next().await { let mut object_list = result?; + + // Extra safety check: even if a collection of objects is garbage, check max() of modification + // times before purging, so that if we incorrectly marked a live tenant as garbage then we would + // notice that its index has been written recently and would omit deleting it. + if object_list.is_empty() { + // Simplify subsequent code by ensuring list always has at least one item + // Usually, this only occurs if there is parallel deletions racing us, as there is no empty prefixes + continue; + } + let max_mtime = object_list.iter().map(|o| o.last_modified).max().unwrap(); + let age = max_mtime.elapsed(); + match age { + Err(_) => { + tracing::warn!("Bad last_modified time"); + continue; + } + Ok(a) if a < min_age => { + // Failed age check. This doesn't mean we did something wrong: a tenant might really be garbage and recently + // written, but out of an abundance of caution we still don't purge it. + tracing::info!( + "Skipping tenant with young objects {}..{}", + object_list.first().as_ref().unwrap().key, + object_list.last().as_ref().unwrap().key + ); + continue; + } + Ok(_) => { + // Passed age check + } + } + objects_to_delete.append(&mut object_list); if objects_to_delete.len() >= MAX_KEYS_PER_DELETE { do_delete( diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index b3ed6f6451..346829b7c9 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -50,6 +50,8 @@ enum Command { input_path: String, #[arg(short, long, default_value_t = PurgeMode::DeletedOnly)] mode: PurgeMode, + #[arg(long = "min-age")] + min_age: humantime::Duration, }, #[command(verbatim_doc_comment)] ScanMetadata { @@ -196,9 +198,11 @@ async fn main() -> anyhow::Result<()> { let console_config = ConsoleConfig::from_env()?; find_garbage(bucket_config, console_config, depth, node_kind, output_path).await } - Command::PurgeGarbage { input_path, mode } => { - purge_garbage(input_path, mode, !cli.delete).await - } + Command::PurgeGarbage { + input_path, + mode, + min_age, + } => purge_garbage(input_path, mode, min_age.into(), !cli.delete).await, Command::TenantSnapshot { tenant_id, output_path, From 57f22178d7bbe3f635fc5f0858b942d73f03ad72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 30 Jul 2024 09:59:15 +0200 Subject: [PATCH 07/37] Add metrics for input data considered and taken for compression (#8522) If compression is enabled, we currently try compressing each image larger than a specific size and if the compressed version is smaller, we write that one, otherwise we use the uncompressed image. However, this might sometimes be a wasteful process, if there is a substantial amount of images that don't compress well. The compression metrics added in #8420 `pageserver_compression_image_in_bytes_total` and `pageserver_compression_image_out_bytes_total` are well designed for answering the question how space efficient the total compression process is end-to-end, which helps one to decide whether to enable it or not. To answer the question of how much waste there is in terms of trial compression, so CPU time, we add two metrics: * one about the images that have been trial-compressed (considered), and * one about the images where the compressed image has actually been written (chosen). There is different ways of weighting them, like for example one could look at the count, or the compressed data. But the main contributor to compression CPU usage is amount of data processed, so we weight the images by their *uncompressed* size. In other words, the two metrics are: * `pageserver_compression_image_in_bytes_considered` * `pageserver_compression_image_in_bytes_chosen` Part of #5431 --- pageserver/src/metrics.rs | 18 +++++++++- pageserver/src/tenant/blob_io.rs | 36 +++++++++++++------ .../src/tenant/storage_layer/delta_layer.rs | 2 +- .../src/tenant/storage_layer/image_layer.rs | 26 ++++++++++++-- 4 files changed, 68 insertions(+), 14 deletions(-) diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index 9aff5220f5..ede6b41a75 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -613,7 +613,23 @@ pub(crate) static CIRCUIT_BREAKERS_UNBROKEN: Lazy = Lazy::new(|| { pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES: Lazy = Lazy::new(|| { register_int_counter!( "pageserver_compression_image_in_bytes_total", - "Size of uncompressed data written into image layers" + "Size of data written into image layers before compression" + ) + .expect("failed to define a metric") +}); + +pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_compression_image_in_bytes_considered", + "Size of potentially compressible data written into image layers before compression" + ) + .expect("failed to define a metric") +}); + +pub(crate) static COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN: Lazy = Lazy::new(|| { + register_int_counter!( + "pageserver_compression_image_in_bytes_chosen", + "Size of data whose compressed form was written into image layers" ) .expect("failed to define a metric") }); diff --git a/pageserver/src/tenant/blob_io.rs b/pageserver/src/tenant/blob_io.rs index 791eefebe9..8e9d349ca8 100644 --- a/pageserver/src/tenant/blob_io.rs +++ b/pageserver/src/tenant/blob_io.rs @@ -28,6 +28,12 @@ use crate::virtual_file::VirtualFile; use std::cmp::min; use std::io::{Error, ErrorKind}; +#[derive(Copy, Clone, Debug)] +pub struct CompressionInfo { + pub written_compressed: bool, + pub compressed_size: Option, +} + impl<'a> BlockCursor<'a> { /// Read a blob into a new buffer. pub async fn read_blob( @@ -273,8 +279,10 @@ impl BlobWriter { srcbuf: B, ctx: &RequestContext, ) -> (B::Buf, Result) { - self.write_blob_maybe_compressed(srcbuf, ctx, ImageCompressionAlgorithm::Disabled) - .await + let (buf, res) = self + .write_blob_maybe_compressed(srcbuf, ctx, ImageCompressionAlgorithm::Disabled) + .await; + (buf, res.map(|(off, _compression_info)| off)) } /// Write a blob of data. Returns the offset that it was written to, @@ -284,8 +292,12 @@ impl BlobWriter { srcbuf: B, ctx: &RequestContext, algorithm: ImageCompressionAlgorithm, - ) -> (B::Buf, Result) { + ) -> (B::Buf, Result<(u64, CompressionInfo), Error>) { let offset = self.offset; + let mut compression_info = CompressionInfo { + written_compressed: false, + compressed_size: None, + }; let len = srcbuf.bytes_init(); @@ -328,7 +340,9 @@ impl BlobWriter { encoder.write_all(&slice[..]).await.unwrap(); encoder.shutdown().await.unwrap(); let compressed = encoder.into_inner(); + compression_info.compressed_size = Some(compressed.len()); if compressed.len() < len { + compression_info.written_compressed = true; let compressed_len = compressed.len(); compressed_buf = Some(compressed); (BYTE_ZSTD, compressed_len, slice.into_inner()) @@ -359,7 +373,7 @@ impl BlobWriter { } else { self.write_all(srcbuf, ctx).await }; - (srcbuf, res.map(|_| offset)) + (srcbuf, res.map(|_| (offset, compression_info))) } } @@ -416,12 +430,14 @@ pub(crate) mod tests { let mut wtr = BlobWriter::::new(file, 0); for blob in blobs.iter() { let (_, res) = if compression { - wtr.write_blob_maybe_compressed( - blob.clone(), - ctx, - ImageCompressionAlgorithm::Zstd { level: Some(1) }, - ) - .await + let res = wtr + .write_blob_maybe_compressed( + blob.clone(), + ctx, + ImageCompressionAlgorithm::Zstd { level: Some(1) }, + ) + .await; + (res.0, res.1.map(|(off, _)| off)) } else { wtr.write_blob(blob.clone(), ctx).await }; diff --git a/pageserver/src/tenant/storage_layer/delta_layer.rs b/pageserver/src/tenant/storage_layer/delta_layer.rs index 229d1e3608..f9becf53ff 100644 --- a/pageserver/src/tenant/storage_layer/delta_layer.rs +++ b/pageserver/src/tenant/storage_layer/delta_layer.rs @@ -467,7 +467,7 @@ impl DeltaLayerWriterInner { .write_blob_maybe_compressed(val, ctx, compression) .await; let off = match res { - Ok(off) => off, + Ok((off, _)) => off, Err(e) => return (val, Err(anyhow::anyhow!(e))), }; diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 44ba685490..08db27514a 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -734,6 +734,14 @@ struct ImageLayerWriterInner { // Total uncompressed bytes passed into put_image uncompressed_bytes: u64, + // Like `uncompressed_bytes`, + // but only of images we might consider for compression + uncompressed_bytes_eligible: u64, + + // Like `uncompressed_bytes`, but only of images + // where we have chosen their compressed form + uncompressed_bytes_chosen: u64, + blob_writer: BlobWriter, tree: DiskBtreeBuilder, } @@ -790,6 +798,8 @@ impl ImageLayerWriterInner { tree: tree_builder, blob_writer, uncompressed_bytes: 0, + uncompressed_bytes_eligible: 0, + uncompressed_bytes_chosen: 0, }; Ok(writer) @@ -808,13 +818,22 @@ impl ImageLayerWriterInner { ) -> anyhow::Result<()> { ensure!(self.key_range.contains(&key)); let compression = self.conf.image_compression; - self.uncompressed_bytes += img.len() as u64; + let uncompressed_len = img.len() as u64; + self.uncompressed_bytes += uncompressed_len; let (_img, res) = self .blob_writer .write_blob_maybe_compressed(img, ctx, compression) .await; // TODO: re-use the buffer for `img` further upstack - let off = res?; + let (off, compression_info) = res?; + if compression_info.compressed_size.is_some() { + // The image has been considered for compression at least + self.uncompressed_bytes_eligible += uncompressed_len; + } + if compression_info.written_compressed { + // The image has been compressed + self.uncompressed_bytes_chosen += uncompressed_len; + } let mut keybuf: [u8; KEY_SIZE] = [0u8; KEY_SIZE]; key.write_to_byte_slice(&mut keybuf); @@ -837,6 +856,9 @@ impl ImageLayerWriterInner { // Calculate compression ratio let compressed_size = self.blob_writer.size() - PAGE_SZ as u64; // Subtract PAGE_SZ for header crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES.inc_by(self.uncompressed_bytes); + crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CONSIDERED + .inc_by(self.uncompressed_bytes_eligible); + crate::metrics::COMPRESSION_IMAGE_INPUT_BYTES_CHOSEN.inc_by(self.uncompressed_bytes_chosen); crate::metrics::COMPRESSION_IMAGE_OUTPUT_BYTES.inc_by(compressed_size); let mut file = self.blob_writer.into_inner(); From cd3f4b3a5352993488901327432ff05a0e760e5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Tue, 30 Jul 2024 11:00:37 +0200 Subject: [PATCH 08/37] scrubber: add remote_storage based listing APIs and use them in find-large-objects (#8541) Add two new functions `stream_objects_with_retries` and `stream_tenants_generic` and use them in the `find-large-objects` subcommand, migrating it to `remote_storage`. Also adds the `size` field to the `ListingObject` struct. Part of #7547 --- libs/remote_storage/src/azure_blob.rs | 3 +- libs/remote_storage/src/lib.rs | 1 + libs/remote_storage/src/local_fs.rs | 2 + libs/remote_storage/src/s3_bucket.rs | 5 +- storage_scrubber/src/find_large_objects.rs | 44 +++++------ storage_scrubber/src/garbage.rs | 2 +- storage_scrubber/src/lib.rs | 90 +++++++++++++++++----- storage_scrubber/src/metadata_stream.rs | 33 +++++++- 8 files changed, 133 insertions(+), 47 deletions(-) diff --git a/libs/remote_storage/src/azure_blob.rs b/libs/remote_storage/src/azure_blob.rs index 6ca4ae43f2..3c77d5a227 100644 --- a/libs/remote_storage/src/azure_blob.rs +++ b/libs/remote_storage/src/azure_blob.rs @@ -355,7 +355,8 @@ impl RemoteStorage for AzureBlobStorage { .blobs() .map(|k| ListingObject{ key: self.name_to_relative_path(&k.name), - last_modified: k.properties.last_modified.into() + last_modified: k.properties.last_modified.into(), + size: k.properties.content_length, } ); diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 031548bbec..794e696769 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -153,6 +153,7 @@ pub enum ListingMode { pub struct ListingObject { pub key: RemotePath, pub last_modified: SystemTime, + pub size: u64, } #[derive(Default)] diff --git a/libs/remote_storage/src/local_fs.rs b/libs/remote_storage/src/local_fs.rs index bc6b10aa51..99b4aa4061 100644 --- a/libs/remote_storage/src/local_fs.rs +++ b/libs/remote_storage/src/local_fs.rs @@ -368,6 +368,7 @@ impl RemoteStorage for LocalFs { key: k.clone(), // LocalFs is just for testing, so just specify a dummy time last_modified: SystemTime::now(), + size: 0, }) } }) @@ -411,6 +412,7 @@ impl RemoteStorage for LocalFs { key: RemotePath::from_string(&relative_key).unwrap(), // LocalFs is just for testing last_modified: SystemTime::now(), + size: 0, }); } } diff --git a/libs/remote_storage/src/s3_bucket.rs b/libs/remote_storage/src/s3_bucket.rs index 412f307445..1f25da813d 100644 --- a/libs/remote_storage/src/s3_bucket.rs +++ b/libs/remote_storage/src/s3_bucket.rs @@ -565,9 +565,12 @@ impl RemoteStorage for S3Bucket { } }; + let size = object.size.unwrap_or(0) as u64; + result.keys.push(ListingObject{ key, - last_modified + last_modified, + size, }); if let Some(mut mk) = max_keys { assert!(mk > 0); diff --git a/storage_scrubber/src/find_large_objects.rs b/storage_scrubber/src/find_large_objects.rs index 2ef802229d..f5bb7e088a 100644 --- a/storage_scrubber/src/find_large_objects.rs +++ b/storage_scrubber/src/find_large_objects.rs @@ -1,10 +1,13 @@ +use std::pin::pin; + use futures::{StreamExt, TryStreamExt}; use pageserver::tenant::storage_layer::LayerName; +use remote_storage::ListingMode; use serde::{Deserialize, Serialize}; use crate::{ - checks::parse_layer_object_name, init_remote, list_objects_with_retries, - metadata_stream::stream_tenants, BucketConfig, NodeKind, + checks::parse_layer_object_name, init_remote_generic, metadata_stream::stream_tenants_generic, + stream_objects_with_retries, BucketConfig, NodeKind, }; #[derive(Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] @@ -47,45 +50,38 @@ pub async fn find_large_objects( ignore_deltas: bool, concurrency: usize, ) -> anyhow::Result { - let (s3_client, target) = init_remote(bucket_config.clone(), NodeKind::Pageserver).await?; - let tenants = std::pin::pin!(stream_tenants(&s3_client, &target)); + let (remote_client, target) = + init_remote_generic(bucket_config.clone(), NodeKind::Pageserver).await?; + let tenants = pin!(stream_tenants_generic(&remote_client, &target)); let objects_stream = tenants.map_ok(|tenant_shard_id| { let mut tenant_root = target.tenant_root(&tenant_shard_id); - let s3_client = s3_client.clone(); + let remote_client = remote_client.clone(); async move { let mut objects = Vec::new(); let mut total_objects_ctr = 0u64; // We want the objects and not just common prefixes tenant_root.delimiter.clear(); - let mut continuation_token = None; - loop { - let fetch_response = - list_objects_with_retries(&s3_client, &tenant_root, continuation_token.clone()) - .await?; - for obj in fetch_response.contents().iter().filter(|o| { - if let Some(obj_size) = o.size { - min_size as i64 <= obj_size - } else { - false - } - }) { - let key = obj.key().expect("couldn't get key").to_owned(); + let mut objects_stream = pin!(stream_objects_with_retries( + &remote_client, + ListingMode::NoDelimiter, + &tenant_root + )); + while let Some(listing) = objects_stream.next().await { + let listing = listing?; + for obj in listing.keys.iter().filter(|obj| min_size <= obj.size) { + let key = obj.key.to_string(); let kind = LargeObjectKind::from_key(&key); if ignore_deltas && kind == LargeObjectKind::DeltaLayer { continue; } objects.push(LargeObject { key, - size: obj.size.unwrap() as u64, + size: obj.size, kind, }) } - total_objects_ctr += fetch_response.contents().len() as u64; - match fetch_response.next_continuation_token { - Some(new_token) => continuation_token = Some(new_token), - None => break, - } + total_objects_ctr += listing.keys.len() as u64; } Ok((tenant_shard_id, objects, total_objects_ctr)) diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 78ecfc7232..73479c3658 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -510,7 +510,7 @@ pub async fn purge_garbage( input_path ); - let remote_client = + let (remote_client, _target) = init_remote_generic(garbage_list.bucket_config.clone(), garbage_list.node_kind).await?; assert_eq!( diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index 5c64e7e459..c7900f9b02 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -22,16 +22,18 @@ use aws_sdk_s3::Client; use camino::{Utf8Path, Utf8PathBuf}; use clap::ValueEnum; +use futures::{Stream, StreamExt}; use pageserver::tenant::remote_timeline_client::{remote_tenant_path, remote_timeline_path}; use pageserver::tenant::TENANTS_SEGMENT_NAME; use pageserver_api::shard::TenantShardId; use remote_storage::{ - GenericRemoteStorage, RemotePath, RemoteStorageConfig, RemoteStorageKind, S3Config, - DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT, + GenericRemoteStorage, Listing, ListingMode, RemotePath, RemoteStorageConfig, RemoteStorageKind, + S3Config, DEFAULT_MAX_KEYS_PER_LIST_RESPONSE, DEFAULT_REMOTE_STORAGE_S3_CONCURRENCY_LIMIT, }; use reqwest::Url; use serde::{Deserialize, Serialize}; use tokio::io::AsyncReadExt; +use tokio_util::sync::CancellationToken; use tracing::error; use tracing_appender::non_blocking::WorkerGuard; use tracing_subscriber::{fmt, prelude::*, EnvFilter}; @@ -319,27 +321,35 @@ fn default_prefix_in_bucket(node_kind: NodeKind) -> &'static str { } } +fn make_root_target( + bucket_name: String, + prefix_in_bucket: String, + node_kind: NodeKind, +) -> RootTarget { + let s3_target = S3Target { + bucket_name, + prefix_in_bucket, + delimiter: "/".to_string(), + }; + match node_kind { + NodeKind::Pageserver => RootTarget::Pageserver(s3_target), + NodeKind::Safekeeper => RootTarget::Safekeeper(s3_target), + } +} + async fn init_remote( bucket_config: BucketConfig, node_kind: NodeKind, ) -> anyhow::Result<(Arc, RootTarget)> { let bucket_region = Region::new(bucket_config.region); - let delimiter = "/".to_string(); let s3_client = Arc::new(init_s3_client(bucket_region).await); let default_prefix = default_prefix_in_bucket(node_kind).to_string(); - let s3_root = match node_kind { - NodeKind::Pageserver => RootTarget::Pageserver(S3Target { - bucket_name: bucket_config.bucket, - prefix_in_bucket: bucket_config.prefix_in_bucket.unwrap_or(default_prefix), - delimiter, - }), - NodeKind::Safekeeper => RootTarget::Safekeeper(S3Target { - bucket_name: bucket_config.bucket, - prefix_in_bucket: bucket_config.prefix_in_bucket.unwrap_or(default_prefix), - delimiter, - }), - }; + let s3_root = make_root_target( + bucket_config.bucket, + bucket_config.prefix_in_bucket.unwrap_or(default_prefix), + node_kind, + ); Ok((s3_client, s3_root)) } @@ -347,12 +357,12 @@ async fn init_remote( async fn init_remote_generic( bucket_config: BucketConfig, node_kind: NodeKind, -) -> anyhow::Result { +) -> anyhow::Result<(GenericRemoteStorage, RootTarget)> { let endpoint = env::var("AWS_ENDPOINT_URL").ok(); let default_prefix = default_prefix_in_bucket(node_kind).to_string(); let prefix_in_bucket = Some(bucket_config.prefix_in_bucket.unwrap_or(default_prefix)); let storage = S3Config { - bucket_name: bucket_config.bucket, + bucket_name: bucket_config.bucket.clone(), bucket_region: bucket_config.region, prefix_in_bucket, endpoint, @@ -366,7 +376,13 @@ async fn init_remote_generic( storage: RemoteStorageKind::AwsS3(storage), timeout: RemoteStorageConfig::DEFAULT_TIMEOUT, }; - GenericRemoteStorage::from_config(&storage_config).await + + // We already pass the prefix to the remote client above + let prefix_in_root_target = String::new(); + let s3_root = make_root_target(bucket_config.bucket, prefix_in_root_target, node_kind); + + let client = GenericRemoteStorage::from_config(&storage_config).await?; + Ok((client, s3_root)) } async fn list_objects_with_retries( @@ -404,6 +420,44 @@ async fn list_objects_with_retries( Err(anyhow!("unreachable unless MAX_RETRIES==0")) } +fn stream_objects_with_retries<'a>( + storage_client: &'a GenericRemoteStorage, + listing_mode: ListingMode, + s3_target: &'a S3Target, +) -> impl Stream> + 'a { + async_stream::stream! { + let mut trial = 0; + let cancel = CancellationToken::new(); + let prefix_str = &s3_target + .prefix_in_bucket + .strip_prefix("/") + .unwrap_or(&s3_target.prefix_in_bucket); + let prefix = RemotePath::from_string(prefix_str)?; + let mut list_stream = + storage_client.list_streaming(Some(&prefix), listing_mode, None, &cancel); + while let Some(res) = list_stream.next().await { + if let Err(err) = res { + let yield_err = if err.is_permanent() { + true + } else { + let backoff_time = 1 << trial.max(5); + tokio::time::sleep(Duration::from_secs(backoff_time)).await; + trial += 1; + trial == MAX_RETRIES - 1 + }; + if yield_err { + yield Err(err) + .with_context(|| format!("Failed to list objects {MAX_RETRIES} times")); + break; + } + } else { + trial = 0; + yield res.map_err(anyhow::Error::from); + } + } + } +} + async fn download_object_with_retries( s3_client: &Client, bucket_name: &str, diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index c05874f556..91dba3c992 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -1,12 +1,41 @@ -use anyhow::Context; +use std::str::FromStr; + +use anyhow::{anyhow, Context}; use async_stream::{stream, try_stream}; use aws_sdk_s3::{types::ObjectIdentifier, Client}; +use futures::StreamExt; +use remote_storage::{GenericRemoteStorage, ListingMode}; use tokio_stream::Stream; -use crate::{list_objects_with_retries, RootTarget, S3Target, TenantShardTimelineId}; +use crate::{ + list_objects_with_retries, stream_objects_with_retries, RootTarget, S3Target, + TenantShardTimelineId, +}; use pageserver_api::shard::TenantShardId; use utils::id::{TenantId, TimelineId}; +/// Given a remote storage and a target, output a stream of TenantIds discovered via listing prefixes +pub fn stream_tenants_generic<'a>( + remote_client: &'a GenericRemoteStorage, + target: &'a RootTarget, +) -> impl Stream> + 'a { + try_stream! { + let tenants_target = target.tenants_root(); + let mut tenants_stream = + std::pin::pin!(stream_objects_with_retries(remote_client, ListingMode::WithDelimiter, &tenants_target)); + while let Some(chunk) = tenants_stream.next().await { + let chunk = chunk?; + let entry_ids = chunk.prefixes.iter() + .map(|prefix| prefix.get_path().file_name().ok_or_else(|| anyhow!("no final component in path '{prefix}'"))); + for dir_name_res in entry_ids { + let dir_name = dir_name_res?; + let id = TenantShardId::from_str(dir_name)?; + yield id; + } + } + } +} + /// Given an S3 bucket, output a stream of TenantIds discovered via ListObjectsv2 pub fn stream_tenants<'a>( s3_client: &'a Client, From 5be3e090827755cc052f48e703c028ef721a43b8 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Tue, 30 Jul 2024 13:38:23 +0100 Subject: [PATCH 09/37] CI(benchmarking): make neonvm default provisioner (#8538) ## Problem We don't allow regular end-users to use `k8s-pod` provisioner, but we still use it in nightly benchmarks ## Summary of changes - Remove `provisioner` input from `neon-create-project` action, use `k8s-neonvm` as a default provioner - Change `neon-` platform prefix to `neonvm-` - Remove `neon-captest-freetier` and `neon-captest-new` as we already have their `neonvm` counterparts --- .../actions/neon-project-create/action.yml | 12 +---- .github/workflows/benchmarking.yml | 52 ++++++++----------- 2 files changed, 25 insertions(+), 39 deletions(-) diff --git a/.github/actions/neon-project-create/action.yml b/.github/actions/neon-project-create/action.yml index d4029bd37c..f4a194639f 100644 --- a/.github/actions/neon-project-create/action.yml +++ b/.github/actions/neon-project-create/action.yml @@ -14,11 +14,8 @@ inputs: api_host: description: 'Neon API host' default: console-stage.neon.build - provisioner: - description: 'k8s-pod or k8s-neonvm' - default: 'k8s-pod' compute_units: - description: '[Min, Max] compute units; Min and Max are used for k8s-neonvm with autoscaling, for k8s-pod values Min and Max should be equal' + description: '[Min, Max] compute units' default: '[1, 1]' outputs: @@ -37,10 +34,6 @@ runs: # A shell without `set -x` to not to expose password/dsn in logs shell: bash -euo pipefail {0} run: | - if [ "${PROVISIONER}" == "k8s-pod" ] && [ "${MIN_CU}" != "${MAX_CU}" ]; then - echo >&2 "For k8s-pod provisioner MIN_CU should be equal to MAX_CU" - fi - project=$(curl \ "https://${API_HOST}/api/v2/projects" \ --fail \ @@ -52,7 +45,7 @@ runs: \"name\": \"Created by actions/neon-project-create; GITHUB_RUN_ID=${GITHUB_RUN_ID}\", \"pg_version\": ${POSTGRES_VERSION}, \"region_id\": \"${REGION_ID}\", - \"provisioner\": \"${PROVISIONER}\", + \"provisioner\": \"k8s-neonvm\", \"autoscaling_limit_min_cu\": ${MIN_CU}, \"autoscaling_limit_max_cu\": ${MAX_CU}, \"settings\": { } @@ -75,6 +68,5 @@ runs: API_KEY: ${{ inputs.api_key }} REGION_ID: ${{ inputs.region_id }} POSTGRES_VERSION: ${{ inputs.postgres_version }} - PROVISIONER: ${{ inputs.provisioner }} MIN_CU: ${{ fromJSON(inputs.compute_units)[0] }} MAX_CU: ${{ fromJSON(inputs.compute_units)[1] }} diff --git a/.github/workflows/benchmarking.yml b/.github/workflows/benchmarking.yml index 5ffdb29fe6..f7ea534fb9 100644 --- a/.github/workflows/benchmarking.yml +++ b/.github/workflows/benchmarking.yml @@ -63,11 +63,9 @@ jobs: - DEFAULT_PG_VERSION: 16 PLATFORM: "neon-staging" region_id: ${{ github.event.inputs.region_id || 'aws-us-east-2' }} - provisioner: 'k8s-pod' - DEFAULT_PG_VERSION: 16 PLATFORM: "azure-staging" region_id: 'azure-eastus2' - provisioner: 'k8s-neonvm' env: TEST_PG_BENCH_DURATIONS_MATRIX: "300" TEST_PG_BENCH_SCALES_MATRIX: "10,100" @@ -100,7 +98,6 @@ jobs: region_id: ${{ matrix.region_id }} postgres_version: ${{ env.DEFAULT_PG_VERSION }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} - provisioner: ${{ matrix.provisioner }} - name: Run benchmark uses: ./.github/actions/run-python-test-set @@ -216,11 +213,11 @@ jobs: # Create matrices for the benchmarking jobs, so we run benchmarks on rds only once a week (on Saturday) # # Available platforms: - # - neon-captest-new: Freshly created project (1 CU) - # - neon-captest-freetier: Use freetier-sized compute (0.25 CU) + # - neonvm-captest-new: Freshly created project (1 CU) + # - neonvm-captest-freetier: Use freetier-sized compute (0.25 CU) # - neonvm-captest-azure-new: Freshly created project (1 CU) in azure region # - neonvm-captest-azure-freetier: Use freetier-sized compute (0.25 CU) in azure region - # - neon-captest-reuse: Reusing existing project + # - neonvm-captest-reuse: Reusing existing project # - rds-aurora: Aurora Postgres Serverless v2 with autoscaling from 0.5 to 2 ACUs # - rds-postgres: RDS Postgres db.m5.large instance (2 vCPU, 8 GiB) with gp3 EBS storage env: @@ -245,18 +242,16 @@ jobs: "'"$region_id_default"'" ], "platform": [ - "neon-captest-new", - "neon-captest-reuse", + "neonvm-captest-new", + "neonvm-captest-reuse", "neonvm-captest-new" ], "db_size": [ "10gb" ], - "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neon-captest-freetier", "db_size": "3gb" }, - { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neon-captest-new", "db_size": "50gb" }, - { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" }, + "include": [{ "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-freetier", "db_size": "3gb" }, { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-new", "db_size": "50gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb" }, - { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-freetier", "db_size": "3gb" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "10gb" }, + { "pg_version": 16, "region_id": "azure-eastus2", "platform": "neonvm-azure-captest-new", "db_size": "50gb" }, { "pg_version": 16, "region_id": "'"$region_id_default"'", "platform": "neonvm-captest-sharding-reuse", "db_size": "50gb" }] }' @@ -271,7 +266,7 @@ jobs: run: | matrix='{ "platform": [ - "neon-captest-reuse" + "neonvm-captest-reuse" ] }' @@ -287,7 +282,7 @@ jobs: run: | matrix='{ "platform": [ - "neon-captest-reuse" + "neonvm-captest-reuse" ], "scale": [ "10" @@ -338,7 +333,7 @@ jobs: prefix: latest - name: Create Neon Project - if: contains(fromJson('["neon-captest-new", "neon-captest-freetier", "neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) + if: contains(fromJson('["neonvm-captest-new", "neonvm-captest-freetier", "neonvm-azure-captest-freetier", "neonvm-azure-captest-new"]'), matrix.platform) id: create-neon-project uses: ./.github/actions/neon-project-create with: @@ -346,19 +341,18 @@ jobs: postgres_version: ${{ env.DEFAULT_PG_VERSION }} api_key: ${{ secrets.NEON_STAGING_API_KEY }} compute_units: ${{ (contains(matrix.platform, 'captest-freetier') && '[0.25, 0.25]') || '[1, 1]' }} - provisioner: ${{ (contains(matrix.platform, 'neonvm-') && 'k8s-neonvm') || 'k8s-pod' }} - name: Set up Connection String id: set-up-connstr run: | case "${PLATFORM}" in - neon-captest-reuse) + neonvm-captest-reuse) CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CONNSTR }} ;; neonvm-captest-sharding-reuse) CONNSTR=${{ secrets.BENCHMARK_CAPTEST_SHARDING_CONNSTR }} ;; - neon-captest-new | neon-captest-freetier | neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier) + neonvm-captest-new | neonvm-captest-freetier | neonvm-azure-captest-new | neonvm-azure-captest-freetier) CONNSTR=${{ steps.create-neon-project.outputs.dsn }} ;; rds-aurora) @@ -442,9 +436,9 @@ jobs: fail-fast: false matrix: include: - - PLATFORM: "neon-captest-pgvector" + - PLATFORM: "neonvm-captest-pgvector" - PLATFORM: "azure-captest-pgvector" - + env: TEST_PG_BENCH_DURATIONS_MATRIX: "15m" TEST_PG_BENCH_SCALES_MATRIX: "1" @@ -486,7 +480,7 @@ jobs: id: set-up-connstr run: | case "${PLATFORM}" in - neon-captest-pgvector) + neonvm-captest-pgvector) CONNSTR=${{ secrets.BENCHMARK_PGVECTOR_CONNSTR }} ;; azure-captest-pgvector) @@ -585,7 +579,7 @@ jobs: id: set-up-connstr run: | case "${PLATFORM}" in - neon-captest-reuse) + neonvm-captest-reuse) CONNSTR=${{ secrets.BENCHMARK_CAPTEST_CLICKBENCH_10M_CONNSTR }} ;; rds-aurora) @@ -595,7 +589,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_RDS_POSTGRES_CLICKBENCH_10M_CONNSTR }} ;; *) - echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -672,7 +666,7 @@ jobs: - name: Get Connstring Secret Name run: | case "${PLATFORM}" in - neon-captest-reuse) + neonvm-captest-reuse) ENV_PLATFORM=CAPTEST_TPCH ;; rds-aurora) @@ -682,7 +676,7 @@ jobs: ENV_PLATFORM=RDS_AURORA_TPCH ;; *) - echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac @@ -759,7 +753,7 @@ jobs: id: set-up-connstr run: | case "${PLATFORM}" in - neon-captest-reuse) + neonvm-captest-reuse) CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_CAPTEST_CONNSTR }} ;; rds-aurora) @@ -769,7 +763,7 @@ jobs: CONNSTR=${{ secrets.BENCHMARK_USER_EXAMPLE_RDS_POSTGRES_CONNSTR }} ;; *) - echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neon-captest-reuse', 'rds-aurora', or 'rds-postgres'" + echo >&2 "Unknown PLATFORM=${PLATFORM}. Allowed only 'neonvm-captest-reuse', 'rds-aurora', or 'rds-postgres'" exit 1 ;; esac From 5702e1cb46d2e98c9cce711247b580ddca5b48c8 Mon Sep 17 00:00:00 2001 From: Anton Chaporgin Date: Tue, 30 Jul 2024 16:15:53 +0300 Subject: [PATCH 10/37] [neon/acr] impr: push to ACR while building images (#8545) This tests the ability to push into ACR using OIDC. Proved it worked by running slightly modified YAML. In `promote-images` we push the following images `neon compute-tools {vm-,}compute-node-{v14,v15,v16}` into `neoneastus2`. https://github.com/neondatabase/cloud/issues/14640 --- .github/workflows/build_and_test.yml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 872c1fbb39..3cf40e6153 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -833,6 +833,9 @@ jobs: rm -rf .docker-custom promote-images: + permissions: + contents: read # This is required for actions/checkout + id-token: write # This is required for Azure Login to work. needs: [ check-permissions, tag, test-images, vm-compute-node-image ] runs-on: ubuntu-22.04 @@ -859,6 +862,28 @@ jobs: neondatabase/vm-compute-node-${version}:${{ needs.tag.outputs.build-tag }} done + - name: Azure login + if: github.ref_name == 'main' + uses: azure/login@6c251865b4e6290e7b78be643ea2d005bc51f69a # @v2.1.1 + with: + client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }} + tenant-id: ${{ secrets.AZURE_TENANT_ID }} + subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + - name: Login to ACR + if: github.ref_name == 'main' + run: | + az acr login --name=neoneastus2 + + - name: Copy docker images to ACR-dev + if: github.ref_name == 'main' + run: | + for image in neon compute-tools {vm-,}compute-node-{v14,v15,v16}; do + docker buildx imagetools create \ + -t neoneastus2.azurecr.io/neondatabase/${image}:${{ needs.tag.outputs.build-tag }} \ + neondatabase/${image}:${{ needs.tag.outputs.build-tag }} + done + - name: Add latest tag to images if: github.ref_name == 'main' run: | From b87a1384f05418f2e994370d77b15a7906c3d999 Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Tue, 30 Jul 2024 09:32:00 -0400 Subject: [PATCH 11/37] feat(storcon): store scrubber metadata scan result (#8480) Part of #8128, followed by #8502. ## Problem Currently we lack mechanism to alert unhealthy `scan_metadata` status if we start running this scrubber command as part of a cronjob. With the storage controller client introduced to storage scrubber in #8196, it is viable to set up alert by storing health status in the storage controller database. We intentionally do not store the full output to the database as the json blobs potentially makes the table really huge. Instead, only a health status and a timestamp recording the last time metadata health status is posted on a tenant shard. Signed-off-by: Yuchen Liang --- Cargo.lock | 2 + libs/pageserver_api/src/controller_api.rs | 38 +++- libs/utils/src/auth.rs | 16 +- storage_controller/Cargo.toml | 9 +- .../down.sql | 1 + .../up.sql | 14 ++ storage_controller/src/http.rs | 73 ++++++- storage_controller/src/persistence.rs | 180 +++++++++++++++++- storage_controller/src/schema.rs | 12 +- storage_controller/src/service.rs | 74 ++++++- test_runner/fixtures/neon_fixtures.py | 46 +++++ .../regress/test_storage_controller.py | 122 +++++++++++- 12 files changed, 560 insertions(+), 27 deletions(-) create mode 100644 storage_controller/migrations/2024-07-23-191537_create_metadata_health/down.sql create mode 100644 storage_controller/migrations/2024-07-23-191537_create_metadata_health/up.sql diff --git a/Cargo.lock b/Cargo.lock index 2b56095bc8..2186d55e9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1672,6 +1672,7 @@ checksum = "62d6dcd069e7b5fe49a302411f759d4cf1cf2c27fe798ef46fb8baefc053dd2b" dependencies = [ "bitflags 2.4.1", "byteorder", + "chrono", "diesel_derives", "itoa", "pq-sys", @@ -5718,6 +5719,7 @@ dependencies = [ "aws-config", "bytes", "camino", + "chrono", "clap", "control_plane", "diesel", diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 474f796040..36b1bd95ff 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -1,5 +1,5 @@ use std::str::FromStr; -use std::time::Instant; +use std::time::{Duration, Instant}; /// Request/response types for the storage controller /// API (`/control/v1` prefix). Implemented by the server @@ -294,6 +294,42 @@ pub enum PlacementPolicy { #[derive(Serialize, Deserialize, Debug)] pub struct TenantShardMigrateResponse {} +/// Metadata health record posted from scrubber. +#[derive(Serialize, Deserialize, Debug)] +pub struct MetadataHealthRecord { + pub tenant_shard_id: TenantShardId, + pub healthy: bool, + pub last_scrubbed_at: chrono::DateTime, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct MetadataHealthUpdateRequest { + pub healthy_tenant_shards: Vec, + pub unhealthy_tenant_shards: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct MetadataHealthUpdateResponse {} + +#[derive(Serialize, Deserialize, Debug)] + +pub struct MetadataHealthListUnhealthyResponse { + pub unhealthy_tenant_shards: Vec, +} + +#[derive(Serialize, Deserialize, Debug)] + +pub struct MetadataHealthListOutdatedRequest { + #[serde(with = "humantime_serde")] + pub not_scrubbed_for: Duration, +} + +#[derive(Serialize, Deserialize, Debug)] + +pub struct MetadataHealthListOutdatedResponse { + pub health_records: Vec, +} + #[cfg(test)] mod test { use super::*; diff --git a/libs/utils/src/auth.rs b/libs/utils/src/auth.rs index a1170a460d..7b735875b7 100644 --- a/libs/utils/src/auth.rs +++ b/libs/utils/src/auth.rs @@ -18,20 +18,20 @@ const STORAGE_TOKEN_ALGORITHM: Algorithm = Algorithm::EdDSA; #[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[serde(rename_all = "lowercase")] pub enum Scope { - // Provides access to all data for a specific tenant (specified in `struct Claims` below) + /// Provides access to all data for a specific tenant (specified in `struct Claims` below) // TODO: join these two? Tenant, - // Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs. - // Should only be used e.g. for status check/tenant creation/list. + /// Provides blanket access to all tenants on the pageserver plus pageserver-wide APIs. + /// Should only be used e.g. for status check/tenant creation/list. PageServerApi, - // Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs. - // Should only be used e.g. for status check. - // Currently also used for connection from any pageserver to any safekeeper. + /// Provides blanket access to all data on the safekeeper plus safekeeper-wide APIs. + /// Should only be used e.g. for status check. + /// Currently also used for connection from any pageserver to any safekeeper. SafekeeperData, - // The scope used by pageservers in upcalls to storage controller and cloud control plane + /// The scope used by pageservers in upcalls to storage controller and cloud control plane #[serde(rename = "generations_api")] GenerationsApi, - // Allows access to control plane managment API and some storage controller endpoints. + /// Allows access to control plane managment API and some storage controller endpoints. Admin, /// Allows access to storage controller APIs used by the scrubber, to interrogate the state diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index b54dea5d47..d14b235046 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -18,6 +18,7 @@ anyhow.workspace = true aws-config.workspace = true bytes.workspace = true camino.workspace = true +chrono.workspace = true clap.workspace = true fail.workspace = true futures.workspace = true @@ -44,7 +45,12 @@ scopeguard.workspace = true strum.workspace = true strum_macros.workspace = true -diesel = { version = "2.1.4", features = ["serde_json", "postgres", "r2d2"] } +diesel = { version = "2.1.4", features = [ + "serde_json", + "postgres", + "r2d2", + "chrono", +] } diesel_migrations = { version = "2.1.0" } r2d2 = { version = "0.8.10" } @@ -52,4 +58,3 @@ utils = { path = "../libs/utils/" } metrics = { path = "../libs/metrics/" } control_plane = { path = "../control_plane" } workspace_hack = { version = "0.1", path = "../workspace_hack" } - diff --git a/storage_controller/migrations/2024-07-23-191537_create_metadata_health/down.sql b/storage_controller/migrations/2024-07-23-191537_create_metadata_health/down.sql new file mode 100644 index 0000000000..1ecfc8786f --- /dev/null +++ b/storage_controller/migrations/2024-07-23-191537_create_metadata_health/down.sql @@ -0,0 +1 @@ +DROP TABLE metadata_health; \ No newline at end of file diff --git a/storage_controller/migrations/2024-07-23-191537_create_metadata_health/up.sql b/storage_controller/migrations/2024-07-23-191537_create_metadata_health/up.sql new file mode 100644 index 0000000000..fa87eda119 --- /dev/null +++ b/storage_controller/migrations/2024-07-23-191537_create_metadata_health/up.sql @@ -0,0 +1,14 @@ +CREATE TABLE metadata_health ( + tenant_id VARCHAR NOT NULL, + shard_number INTEGER NOT NULL, + shard_count INTEGER NOT NULL, + PRIMARY KEY(tenant_id, shard_number, shard_count), + -- Rely on cascade behavior for delete + FOREIGN KEY(tenant_id, shard_number, shard_count) REFERENCES tenant_shards ON DELETE CASCADE, + healthy BOOLEAN NOT NULL DEFAULT TRUE, + last_scrubbed_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + + +INSERT INTO metadata_health(tenant_id, shard_number, shard_count) +SELECT tenant_id, shard_number, shard_count FROM tenant_shards; diff --git a/storage_controller/src/http.rs b/storage_controller/src/http.rs index c77918827f..e8513b31eb 100644 --- a/storage_controller/src/http.rs +++ b/storage_controller/src/http.rs @@ -10,7 +10,11 @@ use hyper::header::CONTENT_TYPE; use hyper::{Body, Request, Response}; use hyper::{StatusCode, Uri}; use metrics::{BuildInfo, NeonMetrics}; -use pageserver_api::controller_api::TenantCreateRequest; +use pageserver_api::controller_api::{ + MetadataHealthListOutdatedRequest, MetadataHealthListOutdatedResponse, + MetadataHealthListUnhealthyResponse, MetadataHealthUpdateRequest, MetadataHealthUpdateResponse, + TenantCreateRequest, +}; use pageserver_api::models::{ TenantConfigRequest, TenantLocationConfigRequest, TenantShardSplitRequest, TenantTimeTravelRequest, TimelineCreateRequest, @@ -560,6 +564,51 @@ async fn handle_cancel_node_fill(req: Request) -> Result, A json_response(StatusCode::ACCEPTED, ()) } +async fn handle_metadata_health_update(mut req: Request) -> Result, ApiError> { + check_permissions(&req, Scope::Scrubber)?; + + let update_req = json_request::(&mut req).await?; + let state = get_state(&req); + + state.service.metadata_health_update(update_req).await?; + + json_response(StatusCode::OK, MetadataHealthUpdateResponse {}) +} + +async fn handle_metadata_health_list_unhealthy( + req: Request, +) -> Result, ApiError> { + check_permissions(&req, Scope::Admin)?; + + let state = get_state(&req); + let unhealthy_tenant_shards = state.service.metadata_health_list_unhealthy().await?; + + json_response( + StatusCode::OK, + MetadataHealthListUnhealthyResponse { + unhealthy_tenant_shards, + }, + ) +} + +async fn handle_metadata_health_list_outdated( + mut req: Request, +) -> Result, ApiError> { + check_permissions(&req, Scope::Admin)?; + + let list_outdated_req = json_request::(&mut req).await?; + let state = get_state(&req); + let health_records = state + .service + .metadata_health_list_outdated(list_outdated_req.not_scrubbed_for) + .await?; + + json_response( + StatusCode::OK, + MetadataHealthListOutdatedResponse { health_records }, + ) +} + async fn handle_tenant_shard_split( service: Arc, mut req: Request, @@ -987,6 +1036,28 @@ pub fn make_router( RequestName("control_v1_cancel_node_fill"), ) }) + // Metadata health operations + .post("/control/v1/metadata_health/update", |r| { + named_request_span( + r, + handle_metadata_health_update, + RequestName("control_v1_metadata_health_update"), + ) + }) + .get("/control/v1/metadata_health/unhealthy", |r| { + named_request_span( + r, + handle_metadata_health_list_unhealthy, + RequestName("control_v1_metadata_health_list_unhealthy"), + ) + }) + .post("/control/v1/metadata_health/outdated", |r| { + named_request_span( + r, + handle_metadata_health_list_outdated, + RequestName("control_v1_metadata_health_list_outdated"), + ) + }) // TODO(vlad): endpoint for cancelling drain and fill // Tenant Shard operations .put("/control/v1/tenant/:tenant_shard_id/migrate", |r| { diff --git a/storage_controller/src/persistence.rs b/storage_controller/src/persistence.rs index d8f31e86e5..64a3e597ce 100644 --- a/storage_controller/src/persistence.rs +++ b/storage_controller/src/persistence.rs @@ -8,6 +8,7 @@ use self::split_state::SplitState; use diesel::pg::PgConnection; use diesel::prelude::*; use diesel::Connection; +use pageserver_api::controller_api::MetadataHealthRecord; use pageserver_api::controller_api::ShardSchedulingPolicy; use pageserver_api::controller_api::{NodeSchedulingPolicy, PlacementPolicy}; use pageserver_api::models::TenantConfig; @@ -90,6 +91,10 @@ pub(crate) enum DatabaseOperation { UpdateTenantShard, DeleteTenant, UpdateTenantConfig, + UpdateMetadataHealth, + ListMetadataHealth, + ListMetadataHealthUnhealthy, + ListMetadataHealthOutdated, } #[must_use] @@ -307,15 +312,32 @@ impl Persistence { &self, shards: Vec, ) -> DatabaseResult<()> { - use crate::schema::tenant_shards::dsl::*; + use crate::schema::metadata_health; + use crate::schema::tenant_shards; + + let now = chrono::Utc::now(); + + let metadata_health_records = shards + .iter() + .map(|t| MetadataHealthPersistence { + tenant_id: t.tenant_id.clone(), + shard_number: t.shard_number, + shard_count: t.shard_count, + healthy: true, + last_scrubbed_at: now, + }) + .collect::>(); + self.with_measured_conn( DatabaseOperation::InsertTenantShards, move |conn| -> DatabaseResult<()> { - for tenant in &shards { - diesel::insert_into(tenant_shards) - .values(tenant) - .execute(conn)?; - } + diesel::insert_into(tenant_shards::table) + .values(&shards) + .execute(conn)?; + + diesel::insert_into(metadata_health::table) + .values(&metadata_health_records) + .execute(conn)?; Ok(()) }, ) @@ -329,10 +351,10 @@ impl Persistence { self.with_measured_conn( DatabaseOperation::DeleteTenant, move |conn| -> DatabaseResult<()> { + // `metadata_health` status (if exists) is also deleted based on the cascade behavior. diesel::delete(tenant_shards) .filter(tenant_id.eq(del_tenant_id.to_string())) .execute(conn)?; - Ok(()) }, ) @@ -675,6 +697,94 @@ impl Persistence { ) .await } + + /// Stores all the latest metadata health updates durably. Updates existing entry on conflict. + /// + /// **Correctness:** `metadata_health_updates` should all belong the tenant shards managed by the storage controller. + #[allow(dead_code)] + pub(crate) async fn update_metadata_health_records( + &self, + healthy_records: Vec, + unhealthy_records: Vec, + now: chrono::DateTime, + ) -> DatabaseResult<()> { + use crate::schema::metadata_health::dsl::*; + + self.with_measured_conn( + DatabaseOperation::UpdateMetadataHealth, + move |conn| -> DatabaseResult<_> { + diesel::insert_into(metadata_health) + .values(&healthy_records) + .on_conflict((tenant_id, shard_number, shard_count)) + .do_update() + .set((healthy.eq(true), last_scrubbed_at.eq(now))) + .execute(conn)?; + + diesel::insert_into(metadata_health) + .values(&unhealthy_records) + .on_conflict((tenant_id, shard_number, shard_count)) + .do_update() + .set((healthy.eq(false), last_scrubbed_at.eq(now))) + .execute(conn)?; + Ok(()) + }, + ) + .await + } + + /// Lists all the metadata health records. + #[allow(dead_code)] + pub(crate) async fn list_metadata_health_records( + &self, + ) -> DatabaseResult> { + self.with_measured_conn( + DatabaseOperation::ListMetadataHealth, + move |conn| -> DatabaseResult<_> { + Ok( + crate::schema::metadata_health::table + .load::(conn)?, + ) + }, + ) + .await + } + + /// Lists all the metadata health records that is unhealthy. + #[allow(dead_code)] + pub(crate) async fn list_unhealthy_metadata_health_records( + &self, + ) -> DatabaseResult> { + use crate::schema::metadata_health::dsl::*; + self.with_measured_conn( + DatabaseOperation::ListMetadataHealthUnhealthy, + move |conn| -> DatabaseResult<_> { + Ok(crate::schema::metadata_health::table + .filter(healthy.eq(false)) + .load::(conn)?) + }, + ) + .await + } + + /// Lists all the metadata health records that have not been updated since an `earlier` time. + #[allow(dead_code)] + pub(crate) async fn list_outdated_metadata_health_records( + &self, + earlier: chrono::DateTime, + ) -> DatabaseResult> { + use crate::schema::metadata_health::dsl::*; + + self.with_measured_conn( + DatabaseOperation::ListMetadataHealthOutdated, + move |conn| -> DatabaseResult<_> { + let query = metadata_health.filter(last_scrubbed_at.lt(earlier)); + let res = query.load::(conn)?; + + Ok(res) + }, + ) + .await + } } /// Parts of [`crate::tenant_shard::TenantShard`] that are stored durably @@ -744,3 +854,59 @@ pub(crate) struct NodePersistence { pub(crate) listen_pg_addr: String, pub(crate) listen_pg_port: i32, } + +/// Tenant metadata health status that are stored durably. +#[derive(Queryable, Selectable, Insertable, Serialize, Deserialize, Clone, Eq, PartialEq)] +#[diesel(table_name = crate::schema::metadata_health)] +pub(crate) struct MetadataHealthPersistence { + #[serde(default)] + pub(crate) tenant_id: String, + #[serde(default)] + pub(crate) shard_number: i32, + #[serde(default)] + pub(crate) shard_count: i32, + + pub(crate) healthy: bool, + pub(crate) last_scrubbed_at: chrono::DateTime, +} + +impl MetadataHealthPersistence { + pub fn new( + tenant_shard_id: TenantShardId, + healthy: bool, + last_scrubbed_at: chrono::DateTime, + ) -> Self { + let tenant_id = tenant_shard_id.tenant_id.to_string(); + let shard_number = tenant_shard_id.shard_number.0 as i32; + let shard_count = tenant_shard_id.shard_count.literal() as i32; + + MetadataHealthPersistence { + tenant_id, + shard_number, + shard_count, + healthy, + last_scrubbed_at, + } + } + + #[allow(dead_code)] + pub(crate) fn get_tenant_shard_id(&self) -> Result { + Ok(TenantShardId { + tenant_id: TenantId::from_str(self.tenant_id.as_str())?, + shard_number: ShardNumber(self.shard_number as u8), + shard_count: ShardCount::new(self.shard_count as u8), + }) + } +} + +impl From for MetadataHealthRecord { + fn from(value: MetadataHealthPersistence) -> Self { + MetadataHealthRecord { + tenant_shard_id: value + .get_tenant_shard_id() + .expect("stored tenant id should be valid"), + healthy: value.healthy, + last_scrubbed_at: value.last_scrubbed_at, + } + } +} diff --git a/storage_controller/src/schema.rs b/storage_controller/src/schema.rs index ff37d0fe77..cb5ba3f38b 100644 --- a/storage_controller/src/schema.rs +++ b/storage_controller/src/schema.rs @@ -1,5 +1,15 @@ // @generated automatically by Diesel CLI. +diesel::table! { + metadata_health (tenant_id, shard_number, shard_count) { + tenant_id -> Varchar, + shard_number -> Int4, + shard_count -> Int4, + healthy -> Bool, + last_scrubbed_at -> Timestamptz, + } +} + diesel::table! { nodes (node_id) { node_id -> Int8, @@ -26,4 +36,4 @@ diesel::table! { } } -diesel::allow_tables_to_appear_in_same_query!(nodes, tenant_shards,); +diesel::allow_tables_to_appear_in_same_query!(metadata_health, nodes, tenant_shards,); diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index 821f45d0c0..ea515f67da 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -16,7 +16,7 @@ use crate::{ compute_hook::NotifyError, id_lock_map::{trace_exclusive_lock, trace_shared_lock, IdLockMap, TracingExclusiveGuard}, metrics::LeadershipStatusGroup, - persistence::{AbortShardSplitStatus, TenantFilter}, + persistence::{AbortShardSplitStatus, MetadataHealthPersistence, TenantFilter}, reconciler::{ReconcileError, ReconcileUnits}, scheduler::{MaySchedule, ScheduleContext, ScheduleMode}, tenant_shard::{ @@ -33,11 +33,11 @@ use futures::{stream::FuturesUnordered, StreamExt}; use itertools::Itertools; use pageserver_api::{ controller_api::{ - NodeAvailability, NodeRegisterRequest, NodeSchedulingPolicy, PlacementPolicy, - ShardSchedulingPolicy, TenantCreateRequest, TenantCreateResponse, - TenantCreateResponseShard, TenantDescribeResponse, TenantDescribeResponseShard, - TenantLocateResponse, TenantPolicyRequest, TenantShardMigrateRequest, - TenantShardMigrateResponse, UtilizationScore, + MetadataHealthRecord, MetadataHealthUpdateRequest, NodeAvailability, NodeRegisterRequest, + NodeSchedulingPolicy, PlacementPolicy, ShardSchedulingPolicy, TenantCreateRequest, + TenantCreateResponse, TenantCreateResponseShard, TenantDescribeResponse, + TenantDescribeResponseShard, TenantLocateResponse, TenantPolicyRequest, + TenantShardMigrateRequest, TenantShardMigrateResponse, UtilizationScore, }, models::{SecondaryProgress, TenantConfigRequest, TopTenantShardsRequest}, }; @@ -6095,6 +6095,68 @@ impl Service { Ok(()) } + /// Updates scrubber metadata health check results. + pub(crate) async fn metadata_health_update( + &self, + update_req: MetadataHealthUpdateRequest, + ) -> Result<(), ApiError> { + let now = chrono::offset::Utc::now(); + let (healthy_records, unhealthy_records) = { + let locked = self.inner.read().unwrap(); + let healthy_records = update_req + .healthy_tenant_shards + .into_iter() + // Retain only health records associated with tenant shards managed by storage controller. + .filter(|tenant_shard_id| locked.tenants.contains_key(tenant_shard_id)) + .map(|tenant_shard_id| MetadataHealthPersistence::new(tenant_shard_id, true, now)) + .collect(); + let unhealthy_records = update_req + .unhealthy_tenant_shards + .into_iter() + .filter(|tenant_shard_id| locked.tenants.contains_key(tenant_shard_id)) + .map(|tenant_shard_id| MetadataHealthPersistence::new(tenant_shard_id, false, now)) + .collect(); + + (healthy_records, unhealthy_records) + }; + + self.persistence + .update_metadata_health_records(healthy_records, unhealthy_records, now) + .await?; + Ok(()) + } + + /// Lists the tenant shards that has unhealthy metadata status. + pub(crate) async fn metadata_health_list_unhealthy( + &self, + ) -> Result, ApiError> { + let result = self + .persistence + .list_unhealthy_metadata_health_records() + .await? + .iter() + .map(|p| p.get_tenant_shard_id().unwrap()) + .collect(); + + Ok(result) + } + + /// Lists the tenant shards that have not been scrubbed for some duration. + pub(crate) async fn metadata_health_list_outdated( + &self, + not_scrubbed_for: Duration, + ) -> Result, ApiError> { + let earlier = chrono::offset::Utc::now() - not_scrubbed_for; + let result = self + .persistence + .list_outdated_metadata_health_records(earlier) + .await? + .into_iter() + .map(|record| record.into()) + .collect(); + Ok(result) + } + pub(crate) fn get_leadership_status(&self) -> LeadershipStatus { self.inner.read().unwrap().get_leadership_status() } diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index c5fffc2af6..5b2ebea794 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -449,6 +449,7 @@ class TokenScope(str, Enum): GENERATIONS_API = "generations_api" SAFEKEEPER_DATA = "safekeeperdata" TENANT = "tenant" + SCRUBBER = "scrubber" class NeonEnvBuilder: @@ -2586,6 +2587,51 @@ class NeonStorageController(MetricsGetter, LogUtils): time.sleep(backoff) + def metadata_health_update(self, healthy: List[TenantShardId], unhealthy: List[TenantShardId]): + body: Dict[str, Any] = { + "healthy_tenant_shards": [str(t) for t in healthy], + "unhealthy_tenant_shards": [str(t) for t in unhealthy], + } + + self.request( + "POST", + f"{self.env.storage_controller_api}/control/v1/metadata_health/update", + json=body, + headers=self.headers(TokenScope.SCRUBBER), + ) + + def metadata_health_list_unhealthy(self): + response = self.request( + "GET", + f"{self.env.storage_controller_api}/control/v1/metadata_health/unhealthy", + headers=self.headers(TokenScope.ADMIN), + ) + return response.json() + + def metadata_health_list_outdated(self, duration: str): + body: Dict[str, Any] = {"not_scrubbed_for": duration} + + response = self.request( + "POST", + f"{self.env.storage_controller_api}/control/v1/metadata_health/outdated", + json=body, + headers=self.headers(TokenScope.ADMIN), + ) + return response.json() + + def metadata_health_is_healthy(self, outdated_duration: str = "1h") -> bool: + """Metadata is healthy if there is no unhealthy or outdated health records.""" + + unhealthy = self.metadata_health_list_unhealthy() + outdated = self.metadata_health_list_outdated(outdated_duration) + + healthy = ( + len(unhealthy["unhealthy_tenant_shards"]) == 0 and len(outdated["health_records"]) == 0 + ) + if not healthy: + log.info(f"{unhealthy=}, {outdated=}") + return healthy + def step_down(self): log.info("Asking storage controller to step down") response = self.request( diff --git a/test_runner/regress/test_storage_controller.py b/test_runner/regress/test_storage_controller.py index da638ac233..eb2cdccdb9 100644 --- a/test_runner/regress/test_storage_controller.py +++ b/test_runner/regress/test_storage_controller.py @@ -3,7 +3,7 @@ import threading import time from collections import defaultdict from datetime import datetime, timezone -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union import pytest from fixtures.common_types import TenantId, TenantShardId, TimelineId @@ -1785,6 +1785,126 @@ def test_storage_controller_node_deletion( env.storage_controller.consistency_check() +@pytest.mark.parametrize("shard_count", [None, 2]) +def test_storage_controller_metadata_health( + neon_env_builder: NeonEnvBuilder, + shard_count: Optional[int], +): + """ + Create three tenants A, B, C. + + Phase 1: + - A: Post healthy status. + - B: Post unhealthy status. + - C: No updates. + + Phase 2: + - B: Post healthy status. + - C: Post healthy status. + + Phase 3: + - A: Post unhealthy status. + + Phase 4: + - Delete tenant A, metadata health status should be deleted as well. + """ + + def update_and_query_metadata_health( + env: NeonEnv, + healthy: List[TenantShardId], + unhealthy: List[TenantShardId], + outdated_duration: str = "1h", + ) -> Tuple[Set[str], Set[str]]: + """ + Update metadata health. Then list tenant shards with unhealthy and + outdated metadata health status. + """ + if healthy or unhealthy: + env.storage_controller.metadata_health_update(healthy, unhealthy) + result = env.storage_controller.metadata_health_list_unhealthy() + unhealthy_res = set(result["unhealthy_tenant_shards"]) + result = env.storage_controller.metadata_health_list_outdated(outdated_duration) + outdated_res = set(record["tenant_shard_id"] for record in result["health_records"]) + + return unhealthy_res, outdated_res + + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + + neon_env_builder.num_pageservers = 2 + env = neon_env_builder.init_start() + + # Mock tenant (`initial_tenant``) with healthy scrubber scan result + tenant_a_shard_ids = ( + env.storage_controller.tenant_shard_split(env.initial_tenant, shard_count=shard_count) + if shard_count is not None + else [TenantShardId(env.initial_tenant, 0, 0)] + ) + + # Mock tenant with unhealthy scrubber scan result + tenant_b, _ = env.neon_cli.create_tenant(shard_count=shard_count) + tenant_b_shard_ids = ( + env.storage_controller.tenant_shard_split(tenant_b, shard_count=shard_count) + if shard_count is not None + else [TenantShardId(tenant_b, 0, 0)] + ) + + # Mock tenant that never gets a health update from scrubber + tenant_c, _ = env.neon_cli.create_tenant(shard_count=shard_count) + + tenant_c_shard_ids = ( + env.storage_controller.tenant_shard_split(tenant_c, shard_count=shard_count) + if shard_count is not None + else [TenantShardId(tenant_c, 0, 0)] + ) + + # Metadata health table also updated as tenant shards are created. + assert env.storage_controller.metadata_health_is_healthy() + + # post "fake" updates to storage controller db + + unhealthy, outdated = update_and_query_metadata_health( + env, healthy=tenant_a_shard_ids, unhealthy=tenant_b_shard_ids + ) + + log.info(f"After Phase 1: {unhealthy=}, {outdated=}") + assert len(unhealthy) == len(tenant_b_shard_ids) + for t in tenant_b_shard_ids: + assert str(t) in unhealthy + assert len(outdated) == 0 + + unhealthy, outdated = update_and_query_metadata_health( + env, healthy=tenant_b_shard_ids + tenant_c_shard_ids, unhealthy=[] + ) + + log.info(f"After Phase 2: {unhealthy=}, {outdated=}") + assert len(unhealthy) == 0 + assert len(outdated) == 0 + + unhealthy, outdated = update_and_query_metadata_health( + env, healthy=[], unhealthy=tenant_a_shard_ids + ) + + log.info(f"After Phase 3: {unhealthy=}, {outdated=}") + assert len(unhealthy) == len(tenant_a_shard_ids) + for t in tenant_a_shard_ids: + assert str(t) in unhealthy + assert len(outdated) == 0 + + # Phase 4: Delete A + env.storage_controller.pageserver_api().tenant_delete(env.initial_tenant) + + # A's unhealthy metadata health status should be deleted as well. + assert env.storage_controller.metadata_health_is_healthy() + + # All shards from B and C are not fresh if set outdated duration to 0 seconds. + unhealthy, outdated = update_and_query_metadata_health( + env, healthy=[], unhealthy=tenant_a_shard_ids, outdated_duration="0s" + ) + assert len(unhealthy) == 0 + for t in tenant_b_shard_ids + tenant_c_shard_ids: + assert str(t) in outdated + + def test_storage_controller_step_down(neon_env_builder: NeonEnvBuilder): """ Test the `/control/v1/step_down` storage controller API. Upon receiving such From d1d4631c8f0eda39b9558ab542bf04ca7c4c5604 Mon Sep 17 00:00:00 2001 From: Yuchen Liang <70461588+yliang412@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:07:34 -0400 Subject: [PATCH 12/37] feat(scrubber): post `scan_metadata` results to storage controller (#8502) Part of #8128, followup to #8480. closes #8421. Enable scrubber to optionally post metadata scan health results to storage controller. Signed-off-by: Yuchen Liang --- libs/pageserver_api/src/controller_api.rs | 5 ++- storage_scrubber/src/checks.rs | 5 +++ storage_scrubber/src/lib.rs | 7 ++++ storage_scrubber/src/main.rs | 40 ++++++++++++++----- .../src/pageserver_physical_gc.rs | 8 +--- .../src/scan_pageserver_metadata.rs | 32 ++++++++++----- test_runner/fixtures/neon_fixtures.py | 9 +++-- test_runner/regress/test_storage_scrubber.py | 16 +++++++- 8 files changed, 88 insertions(+), 34 deletions(-) diff --git a/libs/pageserver_api/src/controller_api.rs b/libs/pageserver_api/src/controller_api.rs index 36b1bd95ff..a5b452da83 100644 --- a/libs/pageserver_api/src/controller_api.rs +++ b/libs/pageserver_api/src/controller_api.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::str::FromStr; use std::time::{Duration, Instant}; @@ -304,8 +305,8 @@ pub struct MetadataHealthRecord { #[derive(Serialize, Deserialize, Debug)] pub struct MetadataHealthUpdateRequest { - pub healthy_tenant_shards: Vec, - pub unhealthy_tenant_shards: Vec, + pub healthy_tenant_shards: HashSet, + pub unhealthy_tenant_shards: HashSet, } #[derive(Serialize, Deserialize, Debug)] diff --git a/storage_scrubber/src/checks.rs b/storage_scrubber/src/checks.rs index a35a58aedd..5aa9e88c40 100644 --- a/storage_scrubber/src/checks.rs +++ b/storage_scrubber/src/checks.rs @@ -40,6 +40,11 @@ impl TimelineAnalysis { garbage_keys: Vec::new(), } } + + /// Whether a timeline is healthy. + pub(crate) fn is_healthy(&self) -> bool { + self.errors.is_empty() && self.warnings.is_empty() + } } pub(crate) async fn branch_cleanup_and_check_errors( diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index c7900f9b02..e0f154def3 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -32,6 +32,7 @@ use remote_storage::{ }; use reqwest::Url; use serde::{Deserialize, Serialize}; +use storage_controller_client::control_api; use tokio::io::AsyncReadExt; use tokio_util::sync::CancellationToken; use tracing::error; @@ -255,6 +256,12 @@ pub struct ControllerClientConfig { pub controller_jwt: String, } +impl ControllerClientConfig { + pub fn build_client(self) -> control_api::Client { + control_api::Client::new(self.controller_api, Some(self.controller_jwt)) + } +} + pub struct ConsoleConfig { pub token: String, pub base_url: Url, diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index 346829b7c9..4c804c00c1 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -1,7 +1,8 @@ use anyhow::{anyhow, bail}; use camino::Utf8PathBuf; +use pageserver_api::controller_api::{MetadataHealthUpdateRequest, MetadataHealthUpdateResponse}; use pageserver_api::shard::TenantShardId; -use reqwest::Url; +use reqwest::{Method, Url}; use storage_scrubber::garbage::{find_garbage, purge_garbage, PurgeMode}; use storage_scrubber::pageserver_physical_gc::GcMode; use storage_scrubber::scan_pageserver_metadata::scan_metadata; @@ -61,6 +62,8 @@ enum Command { json: bool, #[arg(long = "tenant-id", num_args = 0..)] tenant_ids: Vec, + #[arg(long = "post", default_value_t = false)] + post_to_storage_controller: bool, #[arg(long, default_value = None)] /// For safekeeper node_kind only, points to db with debug dump dump_db_connstr: Option, @@ -116,11 +119,20 @@ async fn main() -> anyhow::Result<()> { chrono::Utc::now().format("%Y_%m_%d__%H_%M_%S") )); + let controller_client_conf = cli.controller_api.map(|controller_api| { + ControllerClientConfig { + controller_api, + // Default to no key: this is a convenience when working in a development environment + controller_jwt: cli.controller_jwt.unwrap_or("".to_owned()), + } + }); + match cli.command { Command::ScanMetadata { json, tenant_ids, node_kind, + post_to_storage_controller, dump_db_connstr, dump_db_table, } => { @@ -159,6 +171,9 @@ async fn main() -> anyhow::Result<()> { } Ok(()) } else { + if controller_client_conf.is_none() && post_to_storage_controller { + return Err(anyhow!("Posting pageserver scan health status to storage controller requires `--controller-api` and `--controller-jwt` to run")); + } match scan_metadata(bucket_config.clone(), tenant_ids).await { Err(e) => { tracing::error!("Failed: {e}"); @@ -170,6 +185,21 @@ async fn main() -> anyhow::Result<()> { } else { println!("{}", summary.summary_string()); } + + if post_to_storage_controller { + if let Some(conf) = controller_client_conf { + let controller_client = conf.build_client(); + let body = summary.build_health_update_request(); + controller_client + .dispatch::( + Method::POST, + "control/v1/metadata_health/update".to_string(), + Some(body), + ) + .await?; + } + } + if summary.is_fatal() { Err(anyhow::anyhow!("Fatal scrub errors detected")) } else if summary.is_empty() { @@ -217,14 +247,6 @@ async fn main() -> anyhow::Result<()> { min_age, mode, } => { - let controller_client_conf = cli.controller_api.map(|controller_api| { - ControllerClientConfig { - controller_api, - // Default to no key: this is a convenience when working in a development environment - controller_jwt: cli.controller_jwt.unwrap_or("".to_owned()), - } - }); - match (&controller_client_conf, mode) { (Some(_), _) => { // Any mode may run when controller API is set diff --git a/storage_scrubber/src/pageserver_physical_gc.rs b/storage_scrubber/src/pageserver_physical_gc.rs index e977fd49f7..69896caa82 100644 --- a/storage_scrubber/src/pageserver_physical_gc.rs +++ b/storage_scrubber/src/pageserver_physical_gc.rs @@ -567,13 +567,7 @@ pub async fn pageserver_physical_gc( } // Execute cross-shard GC, using the accumulator's full view of all the shards built in the per-shard GC - let Some(controller_client) = controller_client_conf.as_ref().map(|c| { - let ControllerClientConfig { - controller_api, - controller_jwt, - } = c; - control_api::Client::new(controller_api.clone(), Some(controller_jwt.clone())) - }) else { + let Some(controller_client) = controller_client_conf.map(|c| c.build_client()) else { tracing::info!("Skipping ancestor layer GC, because no `--controller-api` was specified"); return Ok(summary); }; diff --git a/storage_scrubber/src/scan_pageserver_metadata.rs b/storage_scrubber/src/scan_pageserver_metadata.rs index fbd60f93bb..dc410bde41 100644 --- a/storage_scrubber/src/scan_pageserver_metadata.rs +++ b/storage_scrubber/src/scan_pageserver_metadata.rs @@ -9,12 +9,13 @@ use crate::{init_remote, BucketConfig, NodeKind, RootTarget, TenantShardTimeline use aws_sdk_s3::Client; use futures_util::{StreamExt, TryStreamExt}; use pageserver::tenant::remote_timeline_client::remote_layer_path; +use pageserver_api::controller_api::MetadataHealthUpdateRequest; use pageserver_api::shard::TenantShardId; use serde::Serialize; use utils::id::TenantId; use utils::shard::ShardCount; -#[derive(Serialize)] +#[derive(Serialize, Default)] pub struct MetadataSummary { tenant_count: usize, timeline_count: usize, @@ -23,19 +24,16 @@ pub struct MetadataSummary { with_warnings: HashSet, with_orphans: HashSet, indices_by_version: HashMap, + + #[serde(skip)] + pub(crate) healthy_tenant_shards: HashSet, + #[serde(skip)] + pub(crate) unhealthy_tenant_shards: HashSet, } impl MetadataSummary { fn new() -> Self { - Self { - tenant_count: 0, - timeline_count: 0, - timeline_shard_count: 0, - with_errors: HashSet::new(), - with_warnings: HashSet::new(), - with_orphans: HashSet::new(), - indices_by_version: HashMap::new(), - } + Self::default() } fn update_data(&mut self, data: &S3TimelineBlobData) { @@ -54,6 +52,13 @@ impl MetadataSummary { } fn update_analysis(&mut self, id: &TenantShardTimelineId, analysis: &TimelineAnalysis) { + if analysis.is_healthy() { + self.healthy_tenant_shards.insert(id.tenant_shard_id); + } else { + self.healthy_tenant_shards.remove(&id.tenant_shard_id); + self.unhealthy_tenant_shards.insert(id.tenant_shard_id); + } + if !analysis.errors.is_empty() { self.with_errors.insert(*id); } @@ -101,6 +106,13 @@ Index versions: {version_summary} pub fn is_empty(&self) -> bool { self.timeline_shard_count == 0 } + + pub fn build_health_update_request(&self) -> MetadataHealthUpdateRequest { + MetadataHealthUpdateRequest { + healthy_tenant_shards: self.healthy_tenant_shards.clone(), + unhealthy_tenant_shards: self.unhealthy_tenant_shards.clone(), + } + } } /// Scan the pageserver metadata in an S3 bucket, reporting errors and statistics. diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 5b2ebea794..0c33dec784 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -4401,10 +4401,11 @@ class StorageScrubber: assert stdout is not None return stdout - def scan_metadata(self) -> Any: - stdout = self.scrubber_cli( - ["scan-metadata", "--node-kind", "pageserver", "--json"], timeout=30 - ) + def scan_metadata(self, post_to_storage_controller: bool = False) -> Any: + args = ["scan-metadata", "--node-kind", "pageserver", "--json"] + if post_to_storage_controller: + args.append("--post") + stdout = self.scrubber_cli(args, timeout=30) try: return json.loads(stdout) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index a45430ca86..fadf438788 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -440,10 +440,12 @@ def test_scrubber_scan_pageserver_metadata( assert len(index.layer_metadata) > 0 it = iter(index.layer_metadata.items()) - scan_summary = env.storage_scrubber.scan_metadata() + scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True) assert not scan_summary["with_warnings"] assert not scan_summary["with_errors"] + assert env.storage_controller.metadata_health_is_healthy() + # Delete a layer file that is listed in the index. layer, metadata = next(it) log.info(f"Deleting {timeline_path}/{layer.to_str()}") @@ -453,7 +455,17 @@ def test_scrubber_scan_pageserver_metadata( ) log.info(f"delete response: {delete_response}") - # Check scan summary. Expect it to be a L0 layer so only emit warnings. + # Check scan summary without posting to storage controller. Expect it to be a L0 layer so only emit warnings. scan_summary = env.storage_scrubber.scan_metadata() log.info(f"{pprint.pformat(scan_summary)}") assert len(scan_summary["with_warnings"]) > 0 + + assert env.storage_controller.metadata_health_is_healthy() + + # Now post to storage controller, expect seeing one unhealthy health record + scan_summary = env.storage_scrubber.scan_metadata(post_to_storage_controller=True) + log.info(f"{pprint.pformat(scan_summary)}") + assert len(scan_summary["with_warnings"]) > 0 + + unhealthy = env.storage_controller.metadata_health_list_unhealthy()["unhealthy_tenant_shards"] + assert len(unhealthy) == 1 and unhealthy[0] == str(tenant_shard_id) From fb6c1e939049fc80cc0671ebae457e3662db0f5a Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Tue, 30 Jul 2024 18:13:18 +0200 Subject: [PATCH 13/37] cleanup(compact_level0_phase1): some commentary and wrapping into block expressions (#8544) Byproduct of scouting done for https://github.com/neondatabase/neon/issues/8184 refs https://github.com/neondatabase/neon/issues/8184 --- pageserver/src/tenant/timeline.rs | 21 +--- pageserver/src/tenant/timeline/compaction.rs | 126 ++++++++++++------- 2 files changed, 80 insertions(+), 67 deletions(-) diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 862ca42188..2b205db6e1 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -58,7 +58,7 @@ use std::{ sync::atomic::AtomicU64, }; use std::{ - cmp::{max, min, Ordering}, + cmp::{max, min}, ops::ControlFlow, }; use std::{ @@ -177,25 +177,6 @@ impl std::fmt::Display for ImageLayerCreationMode { } } -/// Wrapper for key range to provide reverse ordering by range length for BinaryHeap -#[derive(Debug, Clone, PartialEq, Eq)] -pub(crate) struct Hole { - key_range: Range, - coverage_size: usize, -} - -impl Ord for Hole { - fn cmp(&self, other: &Self) -> Ordering { - other.coverage_size.cmp(&self.coverage_size) // inverse order - } -} - -impl PartialOrd for Hole { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - /// Temporary function for immutable storage state refactor, ensures we are dropping mutex guard instead of other things. /// Can be removed after all refactors are done. fn drop_rlock(rlock: tokio::sync::RwLockReadGuard) { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 497d631f4f..3292b4a121 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -30,8 +30,8 @@ use crate::tenant::config::defaults::{DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPA use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::merge_iterator::MergeIterator; use crate::tenant::storage_layer::{AsLayerDesc, PersistentLayerDesc, ValueReconstructState}; +use crate::tenant::timeline::ImageLayerCreationOutcome; use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter}; -use crate::tenant::timeline::{Hole, ImageLayerCreationOutcome}; use crate::tenant::timeline::{Layer, ResidentLayer}; use crate::tenant::DeltaLayer; use crate::virtual_file::{MaybeFatalIo, VirtualFile}; @@ -608,62 +608,93 @@ impl Timeline { .read_lock_held_spawn_blocking_startup_micros .till_now(); - // Determine N largest holes where N is number of compacted layers. - let max_holes = deltas_to_compact.len(); - let last_record_lsn = self.get_last_record_lsn(); - let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128; - let min_hole_coverage_size = 3; // TODO: something more flexible? - - // min-heap (reserve space for one more element added before eviction) - let mut heap: BinaryHeap = BinaryHeap::with_capacity(max_holes + 1); - let mut prev: Option = None; - - let mut all_keys = Vec::new(); - - for l in deltas_to_compact.iter() { - all_keys.extend(l.load_keys(ctx).await.map_err(CompactionError::Other)?); - } - - // FIXME: should spawn_blocking the rest of this function - - // The current stdlib sorting implementation is designed in a way where it is - // particularly fast where the slice is made up of sorted sub-ranges. - all_keys.sort_by_key(|DeltaEntry { key, lsn, .. }| (*key, *lsn)); + // TODO: replace with streaming k-merge + let all_keys = { + let mut all_keys = Vec::new(); + for l in deltas_to_compact.iter() { + all_keys.extend(l.load_keys(ctx).await.map_err(CompactionError::Other)?); + } + // The current stdlib sorting implementation is designed in a way where it is + // particularly fast where the slice is made up of sorted sub-ranges. + all_keys.sort_by_key(|DeltaEntry { key, lsn, .. }| (*key, *lsn)); + all_keys + }; stats.read_lock_held_key_sort_micros = stats.read_lock_held_prerequisites_micros.till_now(); - for &DeltaEntry { key: next_key, .. } in all_keys.iter() { - if let Some(prev_key) = prev { - // just first fast filter, do not create hole entries for metadata keys. The last hole in the - // compaction is the gap between data key and metadata keys. - if next_key.to_i128() - prev_key.to_i128() >= min_hole_range - && !Key::is_metadata_key(&prev_key) - { - let key_range = prev_key..next_key; - // Measuring hole by just subtraction of i128 representation of key range boundaries - // has not so much sense, because largest holes will corresponds field1/field2 changes. - // But we are mostly interested to eliminate holes which cause generation of excessive image layers. - // That is why it is better to measure size of hole as number of covering image layers. - let coverage_size = layers.image_coverage(&key_range, last_record_lsn).len(); - if coverage_size >= min_hole_coverage_size { - heap.push(Hole { - key_range, - coverage_size, - }); - if heap.len() > max_holes { - heap.pop(); // remove smallest hole + // Determine N largest holes where N is number of compacted layers. The vec is sorted by key range start. + // + // A hole is a key range for which this compaction doesn't have any WAL records. + // Our goal in this compaction iteration is to avoid creating L1s that, in terms of their key range, + // cover the hole, but actually don't contain any WAL records for that key range. + // The reason is that the mere stack of L1s (`count_deltas`) triggers image layer creation (`create_image_layers`). + // That image layer creation would be useless for a hole range covered by L1s that don't contain any WAL records. + // + // The algorithm chooses holes as follows. + // - Slide a 2-window over the keys in key orde to get the hole range (=distance between two keys). + // - Filter: min threshold on range length + // - Rank: by coverage size (=number of image layers required to reconstruct each key in the range for which we have any data) + // + // For more details, intuition, and some ASCII art see https://github.com/neondatabase/neon/pull/3597#discussion_r1112704451 + #[derive(PartialEq, Eq)] + struct Hole { + key_range: Range, + coverage_size: usize, + } + let holes: Vec = { + use std::cmp::Ordering; + impl Ord for Hole { + fn cmp(&self, other: &Self) -> Ordering { + self.coverage_size.cmp(&other.coverage_size).reverse() + } + } + impl PartialOrd for Hole { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } + } + let max_holes = deltas_to_compact.len(); + let last_record_lsn = self.get_last_record_lsn(); + let min_hole_range = (target_file_size / page_cache::PAGE_SZ as u64) as i128; + let min_hole_coverage_size = 3; // TODO: something more flexible? + // min-heap (reserve space for one more element added before eviction) + let mut heap: BinaryHeap = BinaryHeap::with_capacity(max_holes + 1); + let mut prev: Option = None; + + for &DeltaEntry { key: next_key, .. } in all_keys.iter() { + if let Some(prev_key) = prev { + // just first fast filter, do not create hole entries for metadata keys. The last hole in the + // compaction is the gap between data key and metadata keys. + if next_key.to_i128() - prev_key.to_i128() >= min_hole_range + && !Key::is_metadata_key(&prev_key) + { + let key_range = prev_key..next_key; + // Measuring hole by just subtraction of i128 representation of key range boundaries + // has not so much sense, because largest holes will corresponds field1/field2 changes. + // But we are mostly interested to eliminate holes which cause generation of excessive image layers. + // That is why it is better to measure size of hole as number of covering image layers. + let coverage_size = + layers.image_coverage(&key_range, last_record_lsn).len(); + if coverage_size >= min_hole_coverage_size { + heap.push(Hole { + key_range, + coverage_size, + }); + if heap.len() > max_holes { + heap.pop(); // remove smallest hole + } } } } + prev = Some(next_key.next()); } - prev = Some(next_key.next()); - } + let mut holes = heap.into_vec(); + holes.sort_unstable_by_key(|hole| hole.key_range.start); + holes + }; stats.read_lock_held_compute_holes_micros = stats.read_lock_held_key_sort_micros.till_now(); drop_rlock(guard); stats.read_lock_drop_micros = stats.read_lock_held_compute_holes_micros.till_now(); - let mut holes = heap.into_vec(); - holes.sort_unstable_by_key(|hole| hole.key_range.start); - let mut next_hole = 0; // index of next hole in holes vector // This iterator walks through all key-value pairs from all the layers // we're compacting, in key, LSN order. @@ -738,6 +769,7 @@ impl Timeline { let mut key_values_total_size = 0u64; let mut dup_start_lsn: Lsn = Lsn::INVALID; // start LSN of layer containing values of the single key let mut dup_end_lsn: Lsn = Lsn::INVALID; // end LSN of layer containing values of the single key + let mut next_hole = 0; // index of next hole in holes vector for &DeltaEntry { key, lsn, ref val, .. From fa24d27d38aba64c38b7210fe6e81421592ee53a Mon Sep 17 00:00:00 2001 From: Cihan Demirci <128653800+fcdm@users.noreply.github.com> Date: Tue, 30 Jul 2024 22:34:15 +0300 Subject: [PATCH 14/37] cicd: change Azure storage details [1/2] (#8553) Change Azure storage configuration to point to new variables/secrets. They have the `_NEW` suffix in order not to disrupt any tests while we complete the switch. --- .github/actionlint.yml | 1 + .github/workflows/_build-and-test-locally.yml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index 37983798b7..f086008d34 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -9,5 +9,6 @@ self-hosted-runner: - us-east-2 config-variables: - REMOTE_STORAGE_AZURE_CONTAINER + - REMOTE_STORAGE_AZURE_CONTAINER_NEW - REMOTE_STORAGE_AZURE_REGION - SLACK_UPCOMING_RELEASE_CHANNEL_ID diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 26e234a04d..7751f9e8c9 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -219,9 +219,9 @@ jobs: # Run separate tests for real Azure Blob Storage # XXX: replace region with `eu-central-1`-like region export ENABLE_REAL_AZURE_REMOTE_STORAGE=y - export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}" - export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}" - export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}" + export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV_NEW }}" + export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV_NEW }}" + export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER_NEW }}" export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}" ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)' From 35738ca37f4767c56cb5bd7cb132d694a0f238e5 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 31 Jul 2024 14:17:59 +0200 Subject: [PATCH 15/37] compaction_level0_phase1: bypass PS PageCache for data blocks (#8543) part of https://github.com/neondatabase/neon/issues/8184 # Problem We want to bypass PS PageCache for all data block reads, but `compact_level0_phase1` currently uses `ValueRef::load` to load the WAL records from delta layers. Internally, that maps to `FileBlockReader:read_blk` which hits the PageCache [here](https://github.com/neondatabase/neon/blob/e78341e1c220625d9bfa3f08632bd5cfb8e6a876/pageserver/src/tenant/block_io.rs#L229-L236). # Solution This PR adds a mode for `compact_level0_phase1` that uses the `MergeIterator` for reading the `Value`s from the delta layer files. `MergeIterator` is a streaming k-merge that uses vectored blob_io under the hood, which bypasses the PS PageCache for data blocks. Other notable changes: * change the `DiskBtreeReader::into_stream` to buffer the node, instead of holding a `PageCache` `PageReadGuard`. * Without this, we run out of page cache slots in `test_pageserver_compaction_smoke`. * Generally, `PageReadGuard`s aren't supposed to be held across await points, so, this is a general bugfix. # Testing / Validation / Performance `MergeIterator` has not yet been used in production; it's being developed as part of * https://github.com/neondatabase/neon/issues/8002 Therefore, this PR adds a validation mode that compares the existing approach's value iterator with the new approach's stream output, item by item. If they're not identical, we log a warning / fail the unit/regression test. To avoid flooding the logs, we apply a global rate limit of once per 10 seconds. In any case, we use the existing approach's value. Expected performance impact that will be monitored in staging / nightly benchmarks / eventually pre-prod: * with validation: * increased CPU usage * ~doubled VirtualFile read bytes/second metric * no change in disk IO usage because the kernel page cache will likely have the pages buffered on the second read * without validation: * slightly higher DRAM usage because each iterator participating in the k-merge has a dedicated buffer (as opposed to before, where compactions would rely on the PS PageCaceh as a shared evicting buffer) * less disk IO if previously there were repeat PageCache misses (likely case on a busy production Pageserver) * lower CPU usage: PageCache out of the picture, fewer syscalls are made (vectored blob io batches reads) # Rollout The new code is used with validation mode enabled-by-default. This gets us validation everywhere by default, specifically in - Rust unit tests - Python tests - Nightly pagebench (shouldn't really matter) - Staging Before the next release, I'll merge the following aws.git PR that configures prod to continue using the existing behavior: * https://github.com/neondatabase/aws/pull/1663 # Interactions With Other Features This work & rollout should complete before Direct IO is enabled because Direct IO would double the IOPS & latency for each compaction read (#8240). # Future Work The streaming k-merge's memory usage is proportional to the amount of memory per participating layer. But `compact_level0_phase1` still loads all keys into memory for `all_keys_iter`. Thus, it continues to have active memory usage proportional to the number of keys involved in the compaction. Future work should replace `all_keys_iter` with a streaming keys iterator. This PR has a draft in its first commit, which I later reverted because it's not necessary to achieve the goal of this PR / issue #8184. --- pageserver/src/bin/pageserver.rs | 1 + pageserver/src/config.rs | 19 ++ pageserver/src/repository.rs | 3 +- pageserver/src/tenant/disk_btree.rs | 13 +- pageserver/src/tenant/timeline/compaction.rs | 184 ++++++++++++++++++- 5 files changed, 210 insertions(+), 10 deletions(-) diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 7a96c86ded..2d00f311fb 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -129,6 +129,7 @@ fn main() -> anyhow::Result<()> { info!(?conf.virtual_file_io_engine, "starting with virtual_file IO engine"); info!(?conf.get_impl, "starting with get page implementation"); info!(?conf.get_vectored_impl, "starting with vectored get page implementation"); + info!(?conf.compact_level0_phase1_value_access, "starting with setting for compact_level0_phase1_value_access"); let tenants_path = conf.tenants_path(); if !tenants_path.exists() { diff --git a/pageserver/src/config.rs b/pageserver/src/config.rs index f71881683d..41c2fe0af3 100644 --- a/pageserver/src/config.rs +++ b/pageserver/src/config.rs @@ -29,6 +29,7 @@ use utils::{ logging::LogFormat, }; +use crate::tenant::timeline::compaction::CompactL0Phase1ValueAccess; use crate::tenant::vectored_blob_io::MaxVectoredReadBytes; use crate::tenant::{config::TenantConfOpt, timeline::GetImpl}; use crate::tenant::{TENANTS_SEGMENT_NAME, TIMELINES_SEGMENT_NAME}; @@ -295,6 +296,10 @@ pub struct PageServerConf { pub ephemeral_bytes_per_memory_kb: usize, pub l0_flush: L0FlushConfig, + + /// This flag is temporary and will be removed after gradual rollout. + /// See . + pub compact_level0_phase1_value_access: CompactL0Phase1ValueAccess, } /// We do not want to store this in a PageServerConf because the latter may be logged @@ -401,6 +406,8 @@ struct PageServerConfigBuilder { ephemeral_bytes_per_memory_kb: BuilderValue, l0_flush: BuilderValue, + + compact_level0_phase1_value_access: BuilderValue, } impl PageServerConfigBuilder { @@ -490,6 +497,7 @@ impl PageServerConfigBuilder { validate_vectored_get: Set(DEFAULT_VALIDATE_VECTORED_GET), ephemeral_bytes_per_memory_kb: Set(DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB), l0_flush: Set(L0FlushConfig::default()), + compact_level0_phase1_value_access: Set(CompactL0Phase1ValueAccess::default()), } } } @@ -673,6 +681,10 @@ impl PageServerConfigBuilder { self.l0_flush = BuilderValue::Set(value); } + pub fn compact_level0_phase1_value_access(&mut self, value: CompactL0Phase1ValueAccess) { + self.compact_level0_phase1_value_access = BuilderValue::Set(value); + } + pub fn build(self, id: NodeId) -> anyhow::Result { let default = Self::default_values(); @@ -730,6 +742,7 @@ impl PageServerConfigBuilder { image_compression, ephemeral_bytes_per_memory_kb, l0_flush, + compact_level0_phase1_value_access, } CUSTOM LOGIC { @@ -1002,6 +1015,9 @@ impl PageServerConf { "l0_flush" => { builder.l0_flush(utils::toml_edit_ext::deserialize_item(item).context("l0_flush")?) } + "compact_level0_phase1_value_access" => { + builder.compact_level0_phase1_value_access(utils::toml_edit_ext::deserialize_item(item).context("compact_level0_phase1_value_access")?) + } _ => bail!("unrecognized pageserver option '{key}'"), } } @@ -1086,6 +1102,7 @@ impl PageServerConf { validate_vectored_get: defaults::DEFAULT_VALIDATE_VECTORED_GET, ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), + compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), } } } @@ -1327,6 +1344,7 @@ background_task_maximum_delay = '334 s' image_compression: defaults::DEFAULT_IMAGE_COMPRESSION, ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), + compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), }, "Correct defaults should be used when no config values are provided" ); @@ -1401,6 +1419,7 @@ background_task_maximum_delay = '334 s' image_compression: defaults::DEFAULT_IMAGE_COMPRESSION, ephemeral_bytes_per_memory_kb: defaults::DEFAULT_EPHEMERAL_BYTES_PER_MEMORY_KB, l0_flush: L0FlushConfig::default(), + compact_level0_phase1_value_access: CompactL0Phase1ValueAccess::default(), }, "Should be able to parse all basic config values correctly" ); diff --git a/pageserver/src/repository.rs b/pageserver/src/repository.rs index 5a334d0290..e4ebafd927 100644 --- a/pageserver/src/repository.rs +++ b/pageserver/src/repository.rs @@ -8,8 +8,7 @@ use std::time::Duration; pub use pageserver_api::key::{Key, KEY_SIZE}; /// A 'value' stored for a one Key. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[cfg_attr(test, derive(PartialEq))] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum Value { /// An Image value contains a full copy of the value Image(Bytes), diff --git a/pageserver/src/tenant/disk_btree.rs b/pageserver/src/tenant/disk_btree.rs index 1583a3826a..0107b0ac7e 100644 --- a/pageserver/src/tenant/disk_btree.rs +++ b/pageserver/src/tenant/disk_btree.rs @@ -296,13 +296,19 @@ where let mut stack = Vec::new(); stack.push((self.root_blk, None)); let block_cursor = self.reader.block_cursor(); + let mut node_buf = [0_u8; PAGE_SZ]; while let Some((node_blknum, opt_iter)) = stack.pop() { - // Locate the node. - let node_buf = block_cursor + // Read the node, through the PS PageCache, into local variable `node_buf`. + // We could keep the page cache read guard alive, but, at the time of writing, + // we run quite small PS PageCache s => can't risk running out of + // PageCache space because this stream isn't consumed fast enough. + let page_read_guard = block_cursor .read_blk(self.start_blk + node_blknum, ctx) .await?; + node_buf.copy_from_slice(page_read_guard.as_ref()); + drop(page_read_guard); // drop page cache read guard early - let node = OnDiskNode::deparse(node_buf.as_ref())?; + let node = OnDiskNode::deparse(&node_buf)?; let prefix_len = node.prefix_len as usize; let suffix_len = node.suffix_len as usize; @@ -345,6 +351,7 @@ where Either::Left(idx..node.num_children.into()) }; + // idx points to the first match now. Keep going from there while let Some(idx) = iter.next() { let key_off = idx * suffix_len; diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 3292b4a121..7bfa8e9d35 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -698,7 +698,140 @@ impl Timeline { // This iterator walks through all key-value pairs from all the layers // we're compacting, in key, LSN order. - let all_values_iter = all_keys.iter(); + // If there's both a Value::Image and Value::WalRecord for the same (key,lsn), + // then the Value::Image is ordered before Value::WalRecord. + // + // TODO(https://github.com/neondatabase/neon/issues/8184): remove the page cached blob_io + // option and validation code once we've reached confidence. + enum AllValuesIter<'a> { + PageCachedBlobIo { + all_keys_iter: VecIter<'a>, + }, + StreamingKmergeBypassingPageCache { + merge_iter: MergeIterator<'a>, + }, + ValidatingStreamingKmergeBypassingPageCache { + mode: CompactL0BypassPageCacheValidation, + merge_iter: MergeIterator<'a>, + all_keys_iter: VecIter<'a>, + }, + } + type VecIter<'a> = std::slice::Iter<'a, DeltaEntry<'a>>; // TODO: distinguished lifetimes + impl AllValuesIter<'_> { + async fn next_all_keys_iter( + iter: &mut VecIter<'_>, + ctx: &RequestContext, + ) -> anyhow::Result> { + let Some(DeltaEntry { + key, + lsn, + val: value_ref, + .. + }) = iter.next() + else { + return Ok(None); + }; + let value = value_ref.load(ctx).await?; + Ok(Some((*key, *lsn, value))) + } + async fn next( + &mut self, + ctx: &RequestContext, + ) -> anyhow::Result> { + match self { + AllValuesIter::PageCachedBlobIo { all_keys_iter: iter } => { + Self::next_all_keys_iter(iter, ctx).await + } + AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter } => merge_iter.next().await, + AllValuesIter::ValidatingStreamingKmergeBypassingPageCache { mode, merge_iter, all_keys_iter } => async { + // advance both iterators + let all_keys_iter_item = Self::next_all_keys_iter(all_keys_iter, ctx).await; + let merge_iter_item = merge_iter.next().await; + // compare results & log warnings as needed + macro_rules! rate_limited_warn { + ($($arg:tt)*) => {{ + if cfg!(debug_assertions) || cfg!(feature = "testing") { + warn!($($arg)*); + panic!("CompactL0BypassPageCacheValidation failure, check logs"); + } + use once_cell::sync::Lazy; + use utils::rate_limit::RateLimit; + use std::sync::Mutex; + use std::time::Duration; + static LOGGED: Lazy> = + Lazy::new(|| Mutex::new(RateLimit::new(Duration::from_secs(10)))); + let mut rate_limit = LOGGED.lock().unwrap(); + rate_limit.call(|| { + warn!($($arg)*); + }); + }} + } + match (&all_keys_iter_item, &merge_iter_item) { + (Err(_), Err(_)) => { + // don't bother asserting equivality of the errors + } + (Err(all_keys), Ok(merge)) => { + rate_limited_warn!(?merge, "all_keys_iter returned an error where merge did not: {all_keys:?}"); + }, + (Ok(all_keys), Err(merge)) => { + rate_limited_warn!(?all_keys, "merge returned an error where all_keys_iter did not: {merge:?}"); + }, + (Ok(None), Ok(None)) => { } + (Ok(Some(all_keys)), Ok(None)) => { + rate_limited_warn!(?all_keys, "merge returned None where all_keys_iter returned Some"); + } + (Ok(None), Ok(Some(merge))) => { + rate_limited_warn!(?merge, "all_keys_iter returned None where merge returned Some"); + } + (Ok(Some((all_keys_key, all_keys_lsn, all_keys_value))), Ok(Some((merge_key, merge_lsn, merge_value)))) => { + match mode { + // TODO: in this mode, we still load the value from disk for both iterators, even though we only need the all_keys_iter one + CompactL0BypassPageCacheValidation::KeyLsn => { + let all_keys = (all_keys_key, all_keys_lsn); + let merge = (merge_key, merge_lsn); + if all_keys != merge { + rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN) than all_keys_iter"); + } + } + CompactL0BypassPageCacheValidation::KeyLsnValue => { + let all_keys = (all_keys_key, all_keys_lsn, all_keys_value); + let merge = (merge_key, merge_lsn, merge_value); + if all_keys != merge { + rate_limited_warn!(?all_keys, ?merge, "merge returned a different (Key,LSN,Value) than all_keys_iter"); + } + } + } + } + } + // in case of mismatch, trust the legacy all_keys_iter_item + all_keys_iter_item + }.instrument(info_span!("next")).await + } + } + } + let mut all_values_iter = match &self.conf.compact_level0_phase1_value_access { + CompactL0Phase1ValueAccess::PageCachedBlobIo => AllValuesIter::PageCachedBlobIo { + all_keys_iter: all_keys.iter(), + }, + CompactL0Phase1ValueAccess::StreamingKmerge { validate } => { + let merge_iter = { + let mut deltas = Vec::with_capacity(deltas_to_compact.len()); + for l in deltas_to_compact.iter() { + let l = l.get_as_delta(ctx).await.map_err(CompactionError::Other)?; + deltas.push(l); + } + MergeIterator::create(&deltas, &[], ctx) + }; + match validate { + None => AllValuesIter::StreamingKmergeBypassingPageCache { merge_iter }, + Some(validate) => AllValuesIter::ValidatingStreamingKmergeBypassingPageCache { + mode: validate.clone(), + merge_iter, + all_keys_iter: all_keys.iter(), + }, + } + } + }; // This iterator walks through all keys and is needed to calculate size used by each key let mut all_keys_iter = all_keys @@ -771,11 +904,11 @@ impl Timeline { let mut dup_end_lsn: Lsn = Lsn::INVALID; // end LSN of layer containing values of the single key let mut next_hole = 0; // index of next hole in holes vector - for &DeltaEntry { - key, lsn, ref val, .. - } in all_values_iter + while let Some((key, lsn, value)) = all_values_iter + .next(ctx) + .await + .map_err(CompactionError::Other)? { - let value = val.load(ctx).await.map_err(CompactionError::Other)?; let same_key = prev_key.map_or(false, |prev_key| prev_key == key); // We need to check key boundaries once we reach next key or end of layer with the same key if !same_key || lsn == dup_end_lsn { @@ -960,6 +1093,10 @@ impl Timeline { } } + // Without this, rustc complains about deltas_to_compact still + // being borrowed when we `.into_iter()` below. + drop(all_values_iter); + Ok(CompactLevel0Phase1Result { new_layers, deltas_to_compact: deltas_to_compact @@ -1067,6 +1204,43 @@ impl TryFrom for CompactLevel0Phase1Stats { } } +#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] +#[serde(tag = "mode", rename_all = "kebab-case", deny_unknown_fields)] +pub enum CompactL0Phase1ValueAccess { + /// The old way. + PageCachedBlobIo, + /// The new way. + StreamingKmerge { + /// If set, we run both the old way and the new way, validate that + /// they are identical (=> [`CompactL0BypassPageCacheValidation`]), + /// and if the validation fails, + /// - in tests: fail them with a panic or + /// - in prod, log a rate-limited warning and use the old way's results. + /// + /// If not set, we only run the new way and trust its results. + validate: Option, + }, +} + +/// See [`CompactL0Phase1ValueAccess::StreamingKmerge`]. +#[derive(Debug, PartialEq, Eq, Clone, serde::Deserialize, serde::Serialize)] +#[serde(rename_all = "kebab-case")] +pub enum CompactL0BypassPageCacheValidation { + /// Validate that the series of (key, lsn) pairs are the same. + KeyLsn, + /// Validate that the entire output of old and new way is identical. + KeyLsnValue, +} + +impl Default for CompactL0Phase1ValueAccess { + fn default() -> Self { + CompactL0Phase1ValueAccess::StreamingKmerge { + // TODO(https://github.com/neondatabase/neon/issues/8184): change to None once confident + validate: Some(CompactL0BypassPageCacheValidation::KeyLsnValue), + } + } +} + impl Timeline { /// Entry point for new tiered compaction algorithm. /// From 0356fc426b347cd8a4ec3c21bce88922f9fb78ea Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 31 Jul 2024 15:10:27 +0100 Subject: [PATCH 16/37] CI(regress-tests): run less regression tests (#8561) ## Problem We run regression tests on `release` & `debug` builds for each of the three supported Postgres versions (6 in total). With upcoming ARM support and Postgres 17, the number of jobs will jump to 16, which is a lot. See the internal discussion here: https://neondb.slack.com/archives/C033A2WE6BZ/p1722365908404329 ## Summary of changes - Run `regress-tests` job in debug builds only with the latest Postgres version - Do not do `debug` builds on release branches --- .github/workflows/_build-and-test-locally.yml | 8 ++++++-- .github/workflows/build_and_test.yml | 5 ++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 7751f9e8c9..182e96a8ca 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -19,6 +19,10 @@ on: description: 'debug or release' required: true type: string + pg-versions: + description: 'a json array of postgres versions to run regression tests on' + required: true + type: string defaults: run: @@ -254,7 +258,7 @@ jobs: strategy: fail-fast: false matrix: - pg_version: [ v14, v15, v16 ] + pg_version: ${{ fromJson(inputs.pg-versions) }} steps: - uses: actions/checkout@v4 with: @@ -284,5 +288,5 @@ jobs: - name: Merge and upload coverage data if: | false && - inputs.build-type == 'debug' && matrix.pg_version == 'v14' + inputs.build-type == 'debug' && matrix.pg_version == 'v16' uses: ./.github/actions/save-coverage-data diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3cf40e6153..c4df98f585 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -203,7 +203,8 @@ jobs: fail-fast: false matrix: arch: [ x64 ] - build-type: [ debug, release ] + # Do not build or run tests in debug for release branches + build-type: ${{ fromJson((startsWith(github.ref_name, 'release' && github.event_name == 'push')) && '["release"]' || '["debug", "release"]') }} include: - build-type: release arch: arm64 @@ -213,6 +214,8 @@ jobs: build-tools-image: ${{ needs.build-build-tools-image.outputs.image }} build-tag: ${{ needs.tag.outputs.build-tag }} build-type: ${{ matrix.build-type }} + # Run tests on all Postgres versions in release builds and only on the latest version in debug builds + pg-versions: ${{ matrix.build-type == 'release' && '["v14", "v15", "v16"]' || '["v16"]' }} secrets: inherit # Keep `benchmarks` job outside of `build-and-test-locally` workflow to make job failures non-blocking From 311cc71b086bd01e79741106cf945439ded7e22d Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Wed, 31 Jul 2024 10:48:48 -0400 Subject: [PATCH 17/37] feat(pageserver): support btm-gc-compaction for child branches (#8519) part of https://github.com/neondatabase/neon/issues/8002 For child branches, we will pull the image of the modified keys from the parant into the child branch, which creates a full history for generating key retention. If there are not enough delta keys, the image won't be wrote eventually, and we will only keep the deltas inside the child branch. We could avoid the wasteful work to pull the image from the parent if we can know the number of deltas in advance, in the future (currently we always pull image for all modified keys in the child branch) --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant.rs | 293 ++++++++++++++++++- pageserver/src/tenant/timeline.rs | 7 +- pageserver/src/tenant/timeline/compaction.rs | 135 ++++++--- 3 files changed, 400 insertions(+), 35 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index e5ac6725ad..48c1851a3a 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -7347,6 +7347,7 @@ mod tests { Lsn(0x60), &[Lsn(0x20), Lsn(0x40), Lsn(0x50)], 3, + None, ) .await .unwrap(); @@ -7471,7 +7472,7 @@ mod tests { ), ]; let res = tline - .generate_key_retention(key, &history, Lsn(0x60), &[Lsn(0x40), Lsn(0x50)], 3) + .generate_key_retention(key, &history, Lsn(0x60), &[Lsn(0x40), Lsn(0x50)], 3, None) .await .unwrap(); let expected_res = KeyHistoryRetention { @@ -7517,6 +7518,114 @@ mod tests { }; assert_eq!(res, expected_res); + // In case of branch compaction, the branch itself does not have the full history, and we need to provide + // the ancestor image in the test case. + + let history = vec![ + ( + key, + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append(";0x20")), + ), + ( + key, + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append(";0x30")), + ), + ( + key, + Lsn(0x40), + Value::WalRecord(NeonWalRecord::wal_append(";0x40")), + ), + ( + key, + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + ), + ]; + let res = tline + .generate_key_retention( + key, + &history, + Lsn(0x60), + &[], + 3, + Some((key, Lsn(0x10), Bytes::copy_from_slice(b"0x10"))), + ) + .await + .unwrap(); + let expected_res = KeyHistoryRetention { + below_horizon: vec![( + Lsn(0x60), + KeyLogAtLsn(vec![( + Lsn(0x60), + Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x30;0x40")), // use the ancestor image to reconstruct the page + )]), + )], + above_horizon: KeyLogAtLsn(vec![( + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + )]), + }; + assert_eq!(res, expected_res); + + let history = vec![ + ( + key, + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append(";0x20")), + ), + ( + key, + Lsn(0x40), + Value::WalRecord(NeonWalRecord::wal_append(";0x40")), + ), + ( + key, + Lsn(0x60), + Value::WalRecord(NeonWalRecord::wal_append(";0x60")), + ), + ( + key, + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + ), + ]; + let res = tline + .generate_key_retention( + key, + &history, + Lsn(0x60), + &[Lsn(0x30)], + 3, + Some((key, Lsn(0x10), Bytes::copy_from_slice(b"0x10"))), + ) + .await + .unwrap(); + let expected_res = KeyHistoryRetention { + below_horizon: vec![ + ( + Lsn(0x30), + KeyLogAtLsn(vec![( + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append(";0x20")), + )]), + ), + ( + Lsn(0x60), + KeyLogAtLsn(vec![( + Lsn(0x60), + Value::Image(Bytes::copy_from_slice(b"0x10;0x20;0x40;0x60")), + )]), + ), + ], + above_horizon: KeyLogAtLsn(vec![( + Lsn(0x70), + Value::WalRecord(NeonWalRecord::wal_append(";0x70")), + )]), + }; + assert_eq!(res, expected_res); + Ok(()) } @@ -7715,4 +7824,186 @@ mod tests { Ok(()) } + + #[tokio::test] + async fn test_simple_bottom_most_compaction_on_branch() -> anyhow::Result<()> { + let harness = TenantHarness::create("test_simple_bottom_most_compaction_on_branch").await?; + let (tenant, ctx) = harness.load().await; + + fn get_key(id: u32) -> Key { + let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + let img_layer = (0..10) + .map(|id| (get_key(id), Bytes::from(format!("value {id}@0x10")))) + .collect_vec(); + + let delta1 = vec![ + ( + get_key(1), + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append("@0x20")), + ), + ( + get_key(2), + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append("@0x30")), + ), + ( + get_key(3), + Lsn(0x28), + Value::WalRecord(NeonWalRecord::wal_append("@0x28")), + ), + ( + get_key(3), + Lsn(0x30), + Value::WalRecord(NeonWalRecord::wal_append("@0x30")), + ), + ( + get_key(3), + Lsn(0x40), + Value::WalRecord(NeonWalRecord::wal_append("@0x40")), + ), + ]; + let delta2 = vec![ + ( + get_key(5), + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append("@0x20")), + ), + ( + get_key(6), + Lsn(0x20), + Value::WalRecord(NeonWalRecord::wal_append("@0x20")), + ), + ]; + let delta3 = vec![ + ( + get_key(8), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ( + get_key(9), + Lsn(0x48), + Value::WalRecord(NeonWalRecord::wal_append("@0x48")), + ), + ]; + + let parent_tline = tenant + .create_test_timeline_with_layers( + TIMELINE_ID, + Lsn(0x10), + DEFAULT_PG_VERSION, + &ctx, + vec![], // delta layers + vec![(Lsn(0x18), img_layer)], // image layers + Lsn(0x18), + ) + .await?; + + parent_tline.add_extra_test_dense_keyspace(KeySpace::single(get_key(0)..get_key(10))); + + let branch_tline = tenant + .branch_timeline_test_with_layers( + &parent_tline, + NEW_TIMELINE_ID, + Some(Lsn(0x18)), + &ctx, + vec![ + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta1), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x20)..Lsn(0x48), delta2), + DeltaLayerTestDesc::new_with_inferred_key_range(Lsn(0x48)..Lsn(0x50), delta3), + ], // delta layers + vec![], // image layers + Lsn(0x50), + ) + .await?; + + branch_tline.add_extra_test_dense_keyspace(KeySpace::single(get_key(0)..get_key(10))); + + { + // Update GC info + let mut guard = parent_tline.gc_info.write().unwrap(); + *guard = GcInfo { + retain_lsns: vec![(Lsn(0x18), branch_tline.timeline_id)], + cutoffs: GcCutoffs { + time: Lsn(0x10), + space: Lsn(0x10), + }, + leases: Default::default(), + within_ancestor_pitr: false, + }; + } + + { + // Update GC info + let mut guard = branch_tline.gc_info.write().unwrap(); + *guard = GcInfo { + retain_lsns: vec![(Lsn(0x40), branch_tline.timeline_id)], + cutoffs: GcCutoffs { + time: Lsn(0x50), + space: Lsn(0x50), + }, + leases: Default::default(), + within_ancestor_pitr: false, + }; + } + + let expected_result_at_gc_horizon = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20"), + Bytes::from_static(b"value 2@0x10@0x30"), + Bytes::from_static(b"value 3@0x10@0x28@0x30@0x40"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10@0x20"), + Bytes::from_static(b"value 6@0x10@0x20"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10@0x48"), + Bytes::from_static(b"value 9@0x10@0x48"), + ]; + + let expected_result_at_lsn_40 = [ + Bytes::from_static(b"value 0@0x10"), + Bytes::from_static(b"value 1@0x10@0x20"), + Bytes::from_static(b"value 2@0x10@0x30"), + Bytes::from_static(b"value 3@0x10@0x28@0x30@0x40"), + Bytes::from_static(b"value 4@0x10"), + Bytes::from_static(b"value 5@0x10@0x20"), + Bytes::from_static(b"value 6@0x10@0x20"), + Bytes::from_static(b"value 7@0x10"), + Bytes::from_static(b"value 8@0x10"), + Bytes::from_static(b"value 9@0x10"), + ]; + + let verify_result = || async { + for idx in 0..10 { + assert_eq!( + branch_tline + .get(get_key(idx as u32), Lsn(0x50), &ctx) + .await + .unwrap(), + &expected_result_at_gc_horizon[idx] + ); + assert_eq!( + branch_tline + .get(get_key(idx as u32), Lsn(0x40), &ctx) + .await + .unwrap(), + &expected_result_at_lsn_40[idx] + ); + } + }; + + verify_result().await; + + let cancel = CancellationToken::new(); + branch_tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + + verify_result().await; + + Ok(()) + } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 2b205db6e1..4db44a3a19 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -529,7 +529,6 @@ impl GetVectoredError { } } -#[derive(Debug)] pub struct MissingKeyError { key: Key, shard: ShardNumber, @@ -540,6 +539,12 @@ pub struct MissingKeyError { backtrace: Option, } +impl std::fmt::Debug for MissingKeyError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + impl std::fmt::Display for MissingKeyError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!( diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 7bfa8e9d35..5e9ff1c9e4 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -15,6 +15,7 @@ use super::{ }; use anyhow::{anyhow, Context}; +use bytes::Bytes; use enumset::EnumSet; use fail::fail_point; use itertools::Itertools; @@ -69,17 +70,21 @@ impl KeyHistoryRetention { self, key: Key, delta_writer: &mut Vec<(Key, Lsn, Value)>, - image_writer: &mut ImageLayerWriter, + mut image_writer: Option<&mut ImageLayerWriter>, ctx: &RequestContext, ) -> anyhow::Result<()> { let mut first_batch = true; - for (_, KeyLogAtLsn(logs)) in self.below_horizon { + for (cutoff_lsn, KeyLogAtLsn(logs)) in self.below_horizon { if first_batch { if logs.len() == 1 && logs[0].1.is_image() { let Value::Image(img) = &logs[0].1 else { unreachable!() }; - image_writer.put_image(key, img.clone(), ctx).await?; + if let Some(image_writer) = image_writer.as_mut() { + image_writer.put_image(key, img.clone(), ctx).await?; + } else { + delta_writer.push((key, cutoff_lsn, Value::Image(img.clone()))); + } } else { for (lsn, val) in logs { delta_writer.push((key, lsn, val)); @@ -1328,6 +1333,7 @@ impl Timeline { horizon: Lsn, retain_lsn_below_horizon: &[Lsn], delta_threshold_cnt: usize, + base_img_from_ancestor: Option<(Key, Lsn, Bytes)>, ) -> anyhow::Result { // Pre-checks for the invariants if cfg!(debug_assertions) { @@ -1357,6 +1363,7 @@ impl Timeline { ); } } + let has_ancestor = base_img_from_ancestor.is_some(); // Step 1: split history into len(retain_lsn_below_horizon) + 2 buckets, where the last bucket is for all deltas above the horizon, // and the second-to-last bucket is for the horizon. Each bucket contains lsn_last_bucket < deltas <= lsn_this_bucket. let (mut split_history, lsn_split_points) = { @@ -1390,6 +1397,9 @@ impl Timeline { // For example, we have delta layer key1@0x10, key1@0x20, and image layer key1@0x10, we will // keep the image for key1@0x10 and the delta for key1@0x20. key1@0x10 delta will be simply // dropped. + // + // TODO: in case we have both delta + images for a given LSN and it does not exceed the delta + // threshold, we could have kept delta instead to save space. This is an optimization for the future. continue; } } @@ -1407,9 +1417,13 @@ impl Timeline { "should have at least below + above horizon batches" ); let mut replay_history: Vec<(Key, Lsn, Value)> = Vec::new(); + if let Some((key, lsn, img)) = base_img_from_ancestor { + replay_history.push((key, lsn, Value::Image(img))); + } for (i, split_for_lsn) in split_history.into_iter().enumerate() { + // TODO: there could be image keys inside the splits, and we can compute records_since_last_image accordingly. records_since_last_image += split_for_lsn.len(); - let generate_image = if i == 0 { + let generate_image = if i == 0 && !has_ancestor { // We always generate images for the first batch (below horizon / lowest retain_lsn) true } else if i == batch_cnt - 1 { @@ -1532,20 +1546,25 @@ impl Timeline { retain_lsns_below_horizon.sort(); (selected_layers, gc_cutoff, retain_lsns_below_horizon) }; - let lowest_retain_lsn = retain_lsns_below_horizon - .first() - .copied() - .unwrap_or(gc_cutoff); - if cfg!(debug_assertions) { - assert_eq!( - lowest_retain_lsn, - retain_lsns_below_horizon - .iter() - .min() - .copied() - .unwrap_or(gc_cutoff) - ); - } + let lowest_retain_lsn = if self.ancestor_timeline.is_some() { + Lsn(self.ancestor_lsn.0 + 1) + } else { + let res = retain_lsns_below_horizon + .first() + .copied() + .unwrap_or(gc_cutoff); + if cfg!(debug_assertions) { + assert_eq!( + res, + retain_lsns_below_horizon + .iter() + .min() + .copied() + .unwrap_or(gc_cutoff) + ); + } + res + }; info!( "picked {} layers for compaction with gc_cutoff={} lowest_retain_lsn={}", layer_selection.len(), @@ -1586,6 +1605,7 @@ impl Timeline { let mut accumulated_values = Vec::new(); let mut last_key: Option = None; + #[allow(clippy::too_many_arguments)] async fn flush_deltas( deltas: &mut Vec<(Key, Lsn, crate::repository::Value)>, last_key: Key, @@ -1594,6 +1614,7 @@ impl Timeline { tline: &Arc, lowest_retain_lsn: Lsn, ctx: &RequestContext, + last_batch: bool, ) -> anyhow::Result> { // Check if we need to split the delta layer. We split at the original delta layer boundary to avoid // overlapping layers. @@ -1614,7 +1635,7 @@ impl Timeline { *current_delta_split_point += 1; need_split = true; } - if !need_split { + if !need_split && !last_batch { return Ok(None); } let deltas = std::mem::take(deltas); @@ -1639,15 +1660,44 @@ impl Timeline { Ok(Some(delta_layer)) } - let mut image_layer_writer = ImageLayerWriter::new( - self.conf, - self.timeline_id, - self.tenant_shard_id, - &(Key::MIN..Key::MAX), // covers the full key range - lowest_retain_lsn, - ctx, - ) - .await?; + // Only create image layers when there is no ancestor branches. TODO: create covering image layer + // when some condition meet. + let mut image_layer_writer = if self.ancestor_timeline.is_none() { + Some( + ImageLayerWriter::new( + self.conf, + self.timeline_id, + self.tenant_shard_id, + &(Key::MIN..Key::MAX), // covers the full key range + lowest_retain_lsn, + ctx, + ) + .await?, + ) + } else { + None + }; + + /// Returns None if there is no ancestor branch. Throw an error when the key is not found. + /// + /// Currently, we always get the ancestor image for each key in the child branch no matter whether the image + /// is needed for reconstruction. This should be fixed in the future. + /// + /// Furthermore, we should do vectored get instead of a single get, or better, use k-merge for ancestor + /// images. + async fn get_ancestor_image( + tline: &Arc, + key: Key, + ctx: &RequestContext, + ) -> anyhow::Result> { + if tline.ancestor_timeline.is_none() { + return Ok(None); + }; + // This function is implemented as a get of the current timeline at ancestor LSN, therefore reusing + // as much existing code as possible. + let img = tline.get(key, tline.ancestor_lsn, ctx).await?; + Ok(Some((key, tline.ancestor_lsn, img))) + } let mut delta_values = Vec::new(); let delta_split_points = delta_split_points.into_iter().collect_vec(); @@ -1668,11 +1718,17 @@ impl Timeline { gc_cutoff, &retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, + get_ancestor_image(self, *last_key, ctx).await?, ) .await?; // Put the image into the image layer. Currently we have a single big layer for the compaction. retention - .pipe_to(*last_key, &mut delta_values, &mut image_layer_writer, ctx) + .pipe_to( + *last_key, + &mut delta_values, + image_layer_writer.as_mut(), + ctx, + ) .await?; delta_layers.extend( flush_deltas( @@ -1683,6 +1739,7 @@ impl Timeline { self, lowest_retain_lsn, ctx, + false, ) .await?, ); @@ -1701,11 +1758,17 @@ impl Timeline { gc_cutoff, &retain_lsns_below_horizon, COMPACTION_DELTA_THRESHOLD, + get_ancestor_image(self, last_key, ctx).await?, ) .await?; // Put the image into the image layer. Currently we have a single big layer for the compaction. retention - .pipe_to(last_key, &mut delta_values, &mut image_layer_writer, ctx) + .pipe_to( + last_key, + &mut delta_values, + image_layer_writer.as_mut(), + ctx, + ) .await?; delta_layers.extend( flush_deltas( @@ -1716,19 +1779,25 @@ impl Timeline { self, lowest_retain_lsn, ctx, + true, ) .await?, ); + assert!(delta_values.is_empty(), "unprocessed keys"); - let image_layer = image_layer_writer.finish(self, ctx).await?; + let image_layer = if let Some(writer) = image_layer_writer { + Some(writer.finish(self, ctx).await?) + } else { + None + }; info!( "produced {} delta layers and {} image layers", delta_layers.len(), - 1 + if image_layer.is_some() { 1 } else { 0 } ); let mut compact_to = Vec::new(); compact_to.extend(delta_layers); - compact_to.push(image_layer); + compact_to.extend(image_layer); // Step 3: Place back to the layer map. { let mut guard = self.layers.write().await; From 31122adee3fa0dac6bec5bfd1682f04adf42e490 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Wed, 31 Jul 2024 17:05:45 +0200 Subject: [PATCH 18/37] refactor(page_service): Timeline gate guard holding + cancellation + shutdown (#8339) Since the introduction of sharding, the protocol handling loop in `handle_pagerequests` cannot know anymore which concrete `Tenant`/`Timeline` object any of the incoming `PagestreamFeMessage` resolves to. In fact, one message might resolve to one `Tenant`/`Timeline` while the next one may resolve to another one. To avoid going to tenant manager, we added the `shard_timelines` which acted as an ever-growing cache that held timeline gate guards open for the lifetime of the connection. The consequence of holding the gate guards open was that we had to be sensitive to every cached `Timeline::cancel` on each interaction with the network connection, so that Timeline shutdown would not have to wait for network connection interaction. We can do better than that, meaning more efficiency & better abstraction. I proposed a sketch for it in * https://github.com/neondatabase/neon/pull/8286 and this PR implements an evolution of that sketch. The main idea is is that `mod page_service` shall be solely concerned with the following: 1. receiving requests by speaking the protocol / pagestream subprotocol 2. dispatching the request to a corresponding method on the correct shard/`Timeline` object 3. sending response by speaking the protocol / pagestream subprotocol. The cancellation sensitivity responsibilities are clear cut: * while in `page_service` code, sensitivity to page_service cancellation is sufficient * while in `Timeline` code, sensitivity to `Timeline::cancel` is sufficient To enforce these responsibilities, we introduce the notion of a `timeline::handle::Handle` to a `Timeline` object that is checked out from a `timeline::handle::Cache` for **each request**. The `Handle` derefs to `Timeline` and is supposed to be used for a single async method invocation on `Timeline`. See the lengthy doc comment in `mod handle` for details of the design. --- pageserver/src/bin/pageserver.rs | 43 +- pageserver/src/http/routes.rs | 5 + pageserver/src/lib.rs | 10 +- pageserver/src/page_service.rs | 766 +++++++++--------- pageserver/src/tenant.rs | 2 + pageserver/src/tenant/mgr.rs | 6 +- pageserver/src/tenant/timeline.rs | 20 + pageserver/src/tenant/timeline/handle.rs | 967 +++++++++++++++++++++++ 8 files changed, 1387 insertions(+), 432 deletions(-) create mode 100644 pageserver/src/tenant/timeline/handle.rs diff --git a/pageserver/src/bin/pageserver.rs b/pageserver/src/bin/pageserver.rs index 2d00f311fb..5ebd6511ac 100644 --- a/pageserver/src/bin/pageserver.rs +++ b/pageserver/src/bin/pageserver.rs @@ -17,11 +17,9 @@ use pageserver::config::PageserverIdentity; use pageserver::control_plane_client::ControlPlaneClient; use pageserver::disk_usage_eviction_task::{self, launch_disk_usage_global_eviction_task}; use pageserver::metrics::{STARTUP_DURATION, STARTUP_IS_LOADING}; -use pageserver::task_mgr::WALRECEIVER_RUNTIME; +use pageserver::task_mgr::{COMPUTE_REQUEST_RUNTIME, WALRECEIVER_RUNTIME}; use pageserver::tenant::{secondary, TenantSharedResources}; -use pageserver::{ - CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener, LibpqEndpointListener, -}; +use pageserver::{CancellableTask, ConsumptionMetricsTasks, HttpEndpointListener}; use remote_storage::GenericRemoteStorage; use tokio::signal::unix::SignalKind; use tokio::time::Instant; @@ -31,11 +29,9 @@ use tracing::*; use metrics::set_build_info_metric; use pageserver::{ config::PageServerConf, - context::{DownloadBehavior, RequestContext}, deletion_queue::DeletionQueue, http, page_cache, page_service, task_mgr, - task_mgr::TaskKind, - task_mgr::{BACKGROUND_RUNTIME, COMPUTE_REQUEST_RUNTIME, MGMT_REQUEST_RUNTIME}, + task_mgr::{BACKGROUND_RUNTIME, MGMT_REQUEST_RUNTIME}, tenant::mgr, virtual_file, }; @@ -594,30 +590,13 @@ fn start_pageserver( // Spawn a task to listen for libpq connections. It will spawn further tasks // for each connection. We created the listener earlier already. - let libpq_listener = { - let cancel = CancellationToken::new(); - let libpq_ctx = RequestContext::todo_child( - TaskKind::LibpqEndpointListener, - // listener task shouldn't need to download anything. (We will - // create a separate sub-contexts for each connection, with their - // own download behavior. This context is used only to listen and - // accept connections.) - DownloadBehavior::Error, - ); - - let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error( - "libpq listener", - page_service::libpq_listener_main( - tenant_manager.clone(), - pg_auth, - pageserver_listener, - conf.pg_auth_type, - libpq_ctx, - cancel.clone(), - ), - )); - LibpqEndpointListener(CancellableTask { task, cancel }) - }; + let page_service = page_service::spawn(conf, tenant_manager.clone(), pg_auth, { + let _entered = COMPUTE_REQUEST_RUNTIME.enter(); // TcpListener::from_std requires it + pageserver_listener + .set_nonblocking(true) + .context("set listener to nonblocking")?; + tokio::net::TcpListener::from_std(pageserver_listener).context("create tokio listener")? + }); let mut shutdown_pageserver = Some(shutdown_pageserver.drop_guard()); @@ -645,7 +624,7 @@ fn start_pageserver( shutdown_pageserver.take(); pageserver::shutdown_pageserver( http_endpoint_listener, - libpq_listener, + page_service, consumption_metrics_tasks, disk_usage_eviction_task, &tenant_manager, diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 9222123ad3..117f2c5869 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -296,6 +296,11 @@ impl From for ApiError { GetActiveTenantError::WaitForActiveTimeout { .. } => { ApiError::ResourceUnavailable(format!("{}", e).into()) } + GetActiveTenantError::SwitchedTenant => { + // in our HTTP handlers, this error doesn't happen + // TODO: separate error types + ApiError::ResourceUnavailable("switched tenant".into()) + } } } } diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index d944019641..f729cad3c3 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -30,7 +30,6 @@ pub mod walingest; pub mod walrecord; pub mod walredo; -use crate::task_mgr::TaskKind; use camino::Utf8Path; use deletion_queue::DeletionQueue; use tenant::{ @@ -63,7 +62,6 @@ pub struct CancellableTask { pub cancel: CancellationToken, } pub struct HttpEndpointListener(pub CancellableTask); -pub struct LibpqEndpointListener(pub CancellableTask); pub struct ConsumptionMetricsTasks(pub CancellableTask); pub struct DiskUsageEvictionTask(pub CancellableTask); impl CancellableTask { @@ -77,7 +75,7 @@ impl CancellableTask { #[allow(clippy::too_many_arguments)] pub async fn shutdown_pageserver( http_listener: HttpEndpointListener, - libpq_listener: LibpqEndpointListener, + page_service: page_service::Listener, consumption_metrics_worker: ConsumptionMetricsTasks, disk_usage_eviction_task: Option, tenant_manager: &TenantManager, @@ -89,8 +87,8 @@ pub async fn shutdown_pageserver( use std::time::Duration; // Shut down the libpq endpoint task. This prevents new connections from // being accepted. - timed( - libpq_listener.0.shutdown(), + let remaining_connections = timed( + page_service.stop_accepting(), "shutdown LibpqEndpointListener", Duration::from_secs(1), ) @@ -108,7 +106,7 @@ pub async fn shutdown_pageserver( // Shut down any page service tasks: any in-progress work for particular timelines or tenants // should already have been canclled via mgr::shutdown_all_tenants timed( - task_mgr::shutdown_tasks(Some(TaskKind::PageRequestHandler), None, None), + remaining_connections.shutdown(), "shutdown PageRequestHandlers", Duration::from_secs(1), ) diff --git a/pageserver/src/page_service.rs b/pageserver/src/page_service.rs index 6353f713e0..5344b83e0d 100644 --- a/pageserver/src/page_service.rs +++ b/pageserver/src/page_service.rs @@ -4,9 +4,8 @@ use anyhow::Context; use async_compression::tokio::write::GzipEncoder; use bytes::Buf; -use futures::stream::FuturesUnordered; -use futures::StreamExt; -use pageserver_api::key::Key; +use futures::FutureExt; +use once_cell::sync::OnceCell; use pageserver_api::models::TenantState; use pageserver_api::models::{ PagestreamBeMessage, PagestreamDbSizeRequest, PagestreamDbSizeResponse, @@ -15,28 +14,23 @@ use pageserver_api::models::{ PagestreamGetSlruSegmentRequest, PagestreamGetSlruSegmentResponse, PagestreamNblocksRequest, PagestreamNblocksResponse, PagestreamProtocolVersion, }; -use pageserver_api::shard::ShardIndex; -use pageserver_api::shard::ShardNumber; use pageserver_api::shard::TenantShardId; use postgres_backend::{is_expected_io_error, AuthType, PostgresBackend, QueryError}; use pq_proto::framed::ConnectionError; use pq_proto::FeStartupPacket; use pq_proto::{BeMessage, FeMessage, RowDescriptor}; use std::borrow::Cow; -use std::collections::HashMap; use std::io; -use std::net::TcpListener; use std::str; use std::str::FromStr; use std::sync::Arc; -use std::time::Duration; -use std::time::Instant; use std::time::SystemTime; +use std::time::{Duration, Instant}; use tokio::io::AsyncWriteExt; use tokio::io::{AsyncRead, AsyncWrite}; +use tokio::task::JoinHandle; use tokio_util::sync::CancellationToken; use tracing::*; -use utils::sync::gate::GateGuard; use utils::{ auth::{Claims, Scope, SwappableJwtAuth}, id::{TenantId, TimelineId}, @@ -47,61 +41,130 @@ use utils::{ use crate::auth::check_permission; use crate::basebackup; use crate::basebackup::BasebackupError; +use crate::config::PageServerConf; use crate::context::{DownloadBehavior, RequestContext}; use crate::metrics; use crate::metrics::{ComputeCommandKind, COMPUTE_COMMANDS_COUNTERS, LIVE_CONNECTIONS}; use crate::pgdatadir_mapping::Version; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id; use crate::span::debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id; -use crate::task_mgr; use crate::task_mgr::TaskKind; -use crate::tenant::mgr::GetActiveTenantError; -use crate::tenant::mgr::GetTenantError; -use crate::tenant::mgr::ShardResolveResult; +use crate::task_mgr::{self, COMPUTE_REQUEST_RUNTIME}; use crate::tenant::mgr::ShardSelector; use crate::tenant::mgr::TenantManager; -use crate::tenant::timeline::WaitLsnError; +use crate::tenant::mgr::{GetActiveTenantError, GetTenantError, ShardResolveResult}; +use crate::tenant::timeline::{self, WaitLsnError}; use crate::tenant::GetTimelineError; use crate::tenant::PageReconstructError; -use crate::tenant::Tenant; use crate::tenant::Timeline; use pageserver_api::key::rel_block_to_key; use pageserver_api::reltag::SlruKind; use postgres_ffi::pg_constants::DEFAULTTABLESPACE_OID; use postgres_ffi::BLCKSZ; -// How long we may wait for a [`TenantSlot::InProgress`]` and/or a [`Tenant`] which -// is not yet in state [`TenantState::Active`]. +/// How long we may wait for a [`crate::tenant::mgr::TenantSlot::InProgress`]` and/or a [`crate::tenant::Tenant`] which +/// is not yet in state [`TenantState::Active`]. +/// +/// NB: this is a different value than [`crate::http::routes::ACTIVE_TENANT_TIMEOUT`]. const ACTIVE_TENANT_TIMEOUT: Duration = Duration::from_millis(30000); /////////////////////////////////////////////////////////////////////////////// +pub struct Listener { + cancel: CancellationToken, + /// Cancel the listener task through `listen_cancel` to shut down the listener + /// and get a handle on the existing connections. + task: JoinHandle, +} + +pub struct Connections { + cancel: CancellationToken, + tasks: tokio::task::JoinSet, +} + +pub fn spawn( + conf: &'static PageServerConf, + tenant_manager: Arc, + pg_auth: Option>, + tcp_listener: tokio::net::TcpListener, +) -> Listener { + let cancel = CancellationToken::new(); + let libpq_ctx = RequestContext::todo_child( + TaskKind::LibpqEndpointListener, + // listener task shouldn't need to download anything. (We will + // create a separate sub-contexts for each connection, with their + // own download behavior. This context is used only to listen and + // accept connections.) + DownloadBehavior::Error, + ); + let task = COMPUTE_REQUEST_RUNTIME.spawn(task_mgr::exit_on_panic_or_error( + "libpq listener", + libpq_listener_main( + tenant_manager, + pg_auth, + tcp_listener, + conf.pg_auth_type, + libpq_ctx, + cancel.clone(), + ) + .map(anyhow::Ok), + )); + + Listener { cancel, task } +} + +impl Listener { + pub async fn stop_accepting(self) -> Connections { + self.cancel.cancel(); + self.task + .await + .expect("unreachable: we wrap the listener task in task_mgr::exit_on_panic_or_error") + } +} +impl Connections { + pub async fn shutdown(self) { + let Self { cancel, mut tasks } = self; + cancel.cancel(); + while let Some(res) = tasks.join_next().await { + // the logging done here mimics what was formerly done by task_mgr + match res { + Ok(Ok(())) => {} + Ok(Err(e)) => error!("error in page_service connection task: {:?}", e), + Err(e) => error!("page_service connection task panicked: {:?}", e), + } + } + } +} + /// /// Main loop of the page service. /// /// Listens for connections, and launches a new handler task for each. /// +/// Returns Ok(()) upon cancellation via `cancel`, returning the set of +/// open connections. +/// pub async fn libpq_listener_main( tenant_manager: Arc, auth: Option>, - listener: TcpListener, + listener: tokio::net::TcpListener, auth_type: AuthType, listener_ctx: RequestContext, - cancel: CancellationToken, -) -> anyhow::Result<()> { - listener.set_nonblocking(true)?; - let tokio_listener = tokio::net::TcpListener::from_std(listener)?; + listener_cancel: CancellationToken, +) -> Connections { + let connections_cancel = CancellationToken::new(); + let mut connection_handler_tasks = tokio::task::JoinSet::default(); // Wait for a new connection to arrive, or for server shutdown. while let Some(res) = tokio::select! { biased; - _ = cancel.cancelled() => { + _ = listener_cancel.cancelled() => { // We were requested to shut down. None } - res = tokio_listener.accept() => { + res = listener.accept() => { Some(res) } } { @@ -110,28 +173,16 @@ pub async fn libpq_listener_main( // Connection established. Spawn a new task to handle it. debug!("accepted connection from {}", peer_addr); let local_auth = auth.clone(); - let connection_ctx = listener_ctx .detached_child(TaskKind::PageRequestHandler, DownloadBehavior::Download); - - // PageRequestHandler tasks are not associated with any particular - // timeline in the task manager. In practice most connections will - // only deal with a particular timeline, but we don't know which one - // yet. - task_mgr::spawn( - &tokio::runtime::Handle::current(), - TaskKind::PageRequestHandler, - None, - None, - "serving compute connection task", - page_service_conn_main( - tenant_manager.clone(), - local_auth, - socket, - auth_type, - connection_ctx, - ), - ); + connection_handler_tasks.spawn(page_service_conn_main( + tenant_manager.clone(), + local_auth, + socket, + auth_type, + connection_ctx, + connections_cancel.child_token(), + )); } Err(err) => { // accept() failed. Log the error, and loop back to retry on next connection. @@ -140,11 +191,16 @@ pub async fn libpq_listener_main( } } - debug!("page_service loop terminated"); + debug!("page_service listener loop terminated"); - Ok(()) + Connections { + cancel: connections_cancel, + tasks: connection_handler_tasks, + } } +type ConnectionHandlerResult = anyhow::Result<()>; + #[instrument(skip_all, fields(peer_addr))] async fn page_service_conn_main( tenant_manager: Arc, @@ -152,7 +208,8 @@ async fn page_service_conn_main( socket: tokio::net::TcpStream, auth_type: AuthType, connection_ctx: RequestContext, -) -> anyhow::Result<()> { + cancel: CancellationToken, +) -> ConnectionHandlerResult { let _guard = LIVE_CONNECTIONS .with_label_values(&["page_service"]) .guard(); @@ -200,13 +257,11 @@ async fn page_service_conn_main( // and create a child per-query context when it invokes process_query. // But it's in a shared crate, so, we store connection_ctx inside PageServerHandler // and create the per-query context in process_query ourselves. - let mut conn_handler = PageServerHandler::new(tenant_manager, auth, connection_ctx); + let mut conn_handler = + PageServerHandler::new(tenant_manager, auth, connection_ctx, cancel.clone()); let pgbackend = PostgresBackend::new_from_io(socket, peer_addr, auth_type, None)?; - match pgbackend - .run(&mut conn_handler, &task_mgr::shutdown_token()) - .await - { + match pgbackend.run(&mut conn_handler, &cancel).await { Ok(()) => { // we've been requested to shut down Ok(()) @@ -223,32 +278,154 @@ async fn page_service_conn_main( } } -/// While a handler holds a reference to a Timeline, it also holds a the -/// timeline's Gate open. -struct HandlerTimeline { - timeline: Arc, - _guard: GateGuard, -} - struct PageServerHandler { auth: Option>, claims: Option, - tenant_manager: Arc, - /// The context created for the lifetime of the connection /// services by this PageServerHandler. /// For each query received over the connection, /// `process_query` creates a child context from this one. connection_ctx: RequestContext, - /// See [`Self::cache_timeline`] for usage. - /// + cancel: CancellationToken, + + timeline_handles: TimelineHandles, +} + +struct TimelineHandles { + wrapper: TenantManagerWrapper, /// Note on size: the typical size of this map is 1. The largest size we expect /// to see is the number of shards divided by the number of pageservers (typically < 2), /// or the ratio used when splitting shards (i.e. how many children created from one) /// parent shard, where a "large" number might be ~8. - shard_timelines: HashMap, + handles: timeline::handle::Cache, +} + +impl TimelineHandles { + fn new(tenant_manager: Arc) -> Self { + Self { + wrapper: TenantManagerWrapper { + tenant_manager, + tenant_id: OnceCell::new(), + }, + handles: Default::default(), + } + } + async fn get( + &mut self, + tenant_id: TenantId, + timeline_id: TimelineId, + shard_selector: ShardSelector, + ) -> Result, GetActiveTimelineError> { + if *self.wrapper.tenant_id.get_or_init(|| tenant_id) != tenant_id { + return Err(GetActiveTimelineError::Tenant( + GetActiveTenantError::SwitchedTenant, + )); + } + self.handles + .get(timeline_id, shard_selector, &self.wrapper) + .await + .map_err(|e| match e { + timeline::handle::GetError::TenantManager(e) => e, + timeline::handle::GetError::TimelineGateClosed => { + trace!("timeline gate closed"); + GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) + } + timeline::handle::GetError::PerTimelineStateShutDown => { + trace!("per-timeline state shut down"); + GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) + } + }) + } +} + +pub(crate) struct TenantManagerWrapper { + tenant_manager: Arc, + // We do not support switching tenant_id on a connection at this point. + // We can can add support for this later if needed without changing + // the protocol. + tenant_id: once_cell::sync::OnceCell, +} + +#[derive(Debug)] +pub(crate) struct TenantManagerTypes; + +impl timeline::handle::Types for TenantManagerTypes { + type TenantManagerError = GetActiveTimelineError; + type TenantManager = TenantManagerWrapper; + type Timeline = Arc; +} + +impl timeline::handle::ArcTimeline for Arc { + fn gate(&self) -> &utils::sync::gate::Gate { + &self.gate + } + + fn shard_timeline_id(&self) -> timeline::handle::ShardTimelineId { + Timeline::shard_timeline_id(self) + } + + fn per_timeline_state(&self) -> &timeline::handle::PerTimelineState { + &self.handles + } + + fn get_shard_identity(&self) -> &pageserver_api::shard::ShardIdentity { + Timeline::get_shard_identity(self) + } +} + +impl timeline::handle::TenantManager for TenantManagerWrapper { + async fn resolve( + &self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + ) -> Result, GetActiveTimelineError> { + let tenant_id = self.tenant_id.get().expect("we set this in get()"); + let timeout = ACTIVE_TENANT_TIMEOUT; + let wait_start = Instant::now(); + let deadline = wait_start + timeout; + let tenant_shard = loop { + let resolved = self + .tenant_manager + .resolve_attached_shard(tenant_id, shard_selector); + match resolved { + ShardResolveResult::Found(tenant_shard) => break tenant_shard, + ShardResolveResult::NotFound => { + return Err(GetActiveTimelineError::Tenant( + GetActiveTenantError::NotFound(GetTenantError::NotFound(*tenant_id)), + )); + } + ShardResolveResult::InProgress(barrier) => { + // We can't authoritatively answer right now: wait for InProgress state + // to end, then try again + tokio::select! { + _ = barrier.wait() => { + // The barrier completed: proceed around the loop to try looking up again + }, + _ = tokio::time::sleep(deadline.duration_since(Instant::now())) => { + return Err(GetActiveTimelineError::Tenant(GetActiveTenantError::WaitForActiveTimeout { + latest_state: None, + wait_time: timeout, + })); + } + } + } + }; + }; + + tracing::debug!("Waiting for tenant to enter active state..."); + tenant_shard + .wait_to_become_active(deadline.duration_since(Instant::now())) + .await + .map_err(GetActiveTimelineError::Tenant)?; + + let timeline = tenant_shard + .get_timeline(timeline_id, true) + .map_err(GetActiveTimelineError::Timeline)?; + set_tracing_field_shard_id(&timeline); + Ok(timeline) + } } #[derive(thiserror::Error, Debug)] @@ -292,7 +469,11 @@ impl From for PageStreamError { impl From for PageStreamError { fn from(value: GetActiveTimelineError) -> Self { match value { - GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled) => Self::Shutdown, + GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled) + | GetActiveTimelineError::Tenant(GetActiveTenantError::WillNotBecomeActive( + TenantState::Stopping { .. }, + )) + | GetActiveTimelineError::Timeline(GetTimelineError::ShuttingDown) => Self::Shutdown, GetActiveTimelineError::Tenant(e) => Self::NotFound(format!("{e}").into()), GetActiveTimelineError::Timeline(e) => Self::NotFound(format!("{e}").into()), } @@ -324,64 +505,17 @@ impl PageServerHandler { tenant_manager: Arc, auth: Option>, connection_ctx: RequestContext, + cancel: CancellationToken, ) -> Self { PageServerHandler { - tenant_manager, auth, claims: None, connection_ctx, - shard_timelines: HashMap::new(), + timeline_handles: TimelineHandles::new(tenant_manager), + cancel, } } - /// Future that completes when we need to shut down the connection. - /// - /// We currently need to shut down when any of the following happens: - /// 1. any of the timelines we hold GateGuards for in `shard_timelines` is cancelled - /// 2. task_mgr requests shutdown of the connection - /// - /// NB on (1): the connection's lifecycle is not actually tied to any of the - /// `shard_timelines`s' lifecycles. But it's _necessary_ in the current - /// implementation to be responsive to timeline cancellation because - /// the connection holds their `GateGuards` open (sored in `shard_timelines`). - /// We currently do the easy thing and terminate the connection if any of the - /// shard_timelines gets cancelled. But really, we cuold spend more effort - /// and simply remove the cancelled timeline from the `shard_timelines`, thereby - /// dropping the guard. - /// - /// NB: keep in sync with [`Self::is_connection_cancelled`] - async fn await_connection_cancelled(&self) { - // A short wait before we expend the cycles to walk our timeline map. This avoids incurring - // that cost every time we check for cancellation. - tokio::time::sleep(Duration::from_millis(10)).await; - - // This function is never called concurrently with code that adds timelines to shard_timelines, - // which is enforced by the borrow checker (the future returned by this function carries the - // immutable &self). So it's fine to evaluate shard_timelines after the sleep, we don't risk - // missing any inserts to the map. - - let mut cancellation_sources = Vec::with_capacity(1 + self.shard_timelines.len()); - use futures::future::Either; - cancellation_sources.push(Either::Left(task_mgr::shutdown_watcher())); - cancellation_sources.extend( - self.shard_timelines - .values() - .map(|ht| Either::Right(ht.timeline.cancel.cancelled())), - ); - FuturesUnordered::from_iter(cancellation_sources) - .next() - .await; - } - - /// Checking variant of [`Self::await_connection_cancelled`]. - fn is_connection_cancelled(&self) -> bool { - task_mgr::is_shutdown_requested() - || self - .shard_timelines - .values() - .any(|ht| ht.timeline.cancel.is_cancelled() || ht.timeline.is_stopping()) - } - /// This function always respects cancellation of any timeline in `[Self::shard_timelines]`. Pass in /// a cancellation token at the next scope up (such as a tenant cancellation token) to ensure we respect /// cancellation if there aren't any timelines in the cache. @@ -400,15 +534,21 @@ impl PageServerHandler { flush_r = pgb.flush() => { Ok(flush_r?) }, - _ = self.await_connection_cancelled() => { - Err(QueryError::Shutdown) - } _ = cancel.cancelled() => { Err(QueryError::Shutdown) } ) } + /// Pagestream sub-protocol handler. + /// + /// It is a simple request-response protocol inside a COPYBOTH session. + /// + /// # Coding Discipline + /// + /// Coding discipline within this function: all interaction with the `pgb` connection + /// needs to be sensitive to connection shutdown, currently signalled via [`Self::cancel`]. + /// This is so that we can shutdown page_service quickly. #[instrument(skip_all)] async fn handle_pagerequests( &mut self, @@ -423,27 +563,27 @@ impl PageServerHandler { { debug_assert_current_span_has_tenant_and_timeline_id_no_shard_id(); - let tenant = self - .get_active_tenant_with_timeout(tenant_id, ShardSelector::First, ACTIVE_TENANT_TIMEOUT) - .await?; - // switch client to COPYBOTH pgb.write_message_noflush(&BeMessage::CopyBothResponse)?; - self.flush_cancellable(pgb, &tenant.cancel).await?; + tokio::select! { + biased; + _ = self.cancel.cancelled() => { + return Err(QueryError::Shutdown) + } + res = pgb.flush() => { + res?; + } + } loop { + // read request bytes (it's exactly 1 PagestreamFeMessage per CopyData) let msg = tokio::select! { biased; - - _ = self.await_connection_cancelled() => { - // We were requested to shut down. - info!("shutdown request received in page handler"); + _ = self.cancel.cancelled() => { return Err(QueryError::Shutdown) } - msg = pgb.read_message() => { msg } }; - let copy_data_bytes = match msg? { Some(FeMessage::CopyData(bytes)) => bytes, Some(FeMessage::Terminate) => break, @@ -458,13 +598,12 @@ impl PageServerHandler { trace!("query: {copy_data_bytes:?}"); fail::fail_point!("ps::handle-pagerequest-message"); + // parse request let neon_fe_msg = PagestreamFeMessage::parse(&mut copy_data_bytes.reader(), protocol_version)?; - // TODO: We could create a new per-request context here, with unique ID. - // Currently we use the same per-timeline context for all requests - - let (response, span) = match neon_fe_msg { + // invoke handler function + let (handler_result, span) = match neon_fe_msg { PagestreamFeMessage::Exists(req) => { fail::fail_point!("ps::handle-pagerequest-message::exists"); let span = tracing::info_span!("handle_get_rel_exists_request", rel = %req.rel, req_lsn = %req.request_lsn); @@ -518,31 +657,26 @@ impl PageServerHandler { } }; - match response { - Err(PageStreamError::Shutdown) => { - // If we fail to fulfil a request during shutdown, which may be _because_ of - // shutdown, then do not send the error to the client. Instead just drop the - // connection. - span.in_scope(|| info!("dropping connection due to shutdown")); - return Err(QueryError::Shutdown); - } - Err(PageStreamError::Reconnect(reason)) => { - span.in_scope(|| info!("handler requested reconnect: {reason}")); - return Err(QueryError::Reconnect); - } - Err(e) if self.is_connection_cancelled() => { - // This branch accomodates code within request handlers that returns an anyhow::Error instead of a clean - // shutdown error, this may be buried inside a PageReconstructError::Other for example. - // - // Requests may fail as soon as we are Stopping, even if the Timeline's cancellation token wasn't fired yet, - // because wait_lsn etc will drop out - // is_stopping(): [`Timeline::flush_and_shutdown`] has entered - // is_canceled(): [`Timeline::shutdown`]` has entered - span.in_scope(|| info!("dropped error response during shutdown: {e:#}")); - return Err(QueryError::Shutdown); - } - r => { - let response_msg = r.unwrap_or_else(|e| { + // Map handler result to protocol behavior. + // Some handler errors cause exit from pagestream protocol. + // Other handler errors are sent back as an error message and we stay in pagestream protocol. + let response_msg = match handler_result { + Err(e) => match &e { + PageStreamError::Shutdown => { + // If we fail to fulfil a request during shutdown, which may be _because_ of + // shutdown, then do not send the error to the client. Instead just drop the + // connection. + span.in_scope(|| info!("dropping connection due to shutdown")); + return Err(QueryError::Shutdown); + } + PageStreamError::Reconnect(reason) => { + span.in_scope(|| info!("handler requested reconnect: {reason}")); + return Err(QueryError::Reconnect); + } + PageStreamError::Read(_) + | PageStreamError::LsnTimeout(_) + | PageStreamError::NotFound(_) + | PageStreamError::BadRequest(_) => { // print the all details to the log with {:#}, but for the client the // error message is enough. Do not log if shutting down, as the anyhow::Error // here includes cancellation which is not an error. @@ -553,10 +687,22 @@ impl PageServerHandler { PagestreamBeMessage::Error(PagestreamErrorResponse { message: e.to_string(), }) - }); + } + }, + Ok(response_msg) => response_msg, + }; - pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?; - self.flush_cancellable(pgb, &tenant.cancel).await?; + // marshal & transmit response message + pgb.write_message_noflush(&BeMessage::CopyData(&response_msg.serialize()))?; + tokio::select! { + biased; + _ = self.cancel.cancelled() => { + // We were requested to shut down. + info!("shutdown request received in page handler"); + return Err(QueryError::Shutdown) + } + res = pgb.flush() => { + res?; } } } @@ -644,7 +790,7 @@ impl PageServerHandler { #[instrument(skip_all, fields(shard_id, %lsn))] async fn handle_make_lsn_lease( - &self, + &mut self, pgb: &mut PostgresBackend, tenant_shard_id: TenantShardId, timeline_id: TimelineId, @@ -654,10 +800,16 @@ impl PageServerHandler { where IO: AsyncRead + AsyncWrite + Send + Sync + Unpin, { - let shard_selector = ShardSelector::Known(tenant_shard_id.to_index()); let timeline = self - .get_active_tenant_timeline(tenant_shard_id.tenant_id, timeline_id, shard_selector) + .timeline_handles + .get( + tenant_shard_id.tenant_id, + timeline_id, + ShardSelector::Known(tenant_shard_id.to_index()), + ) .await?; + set_tracing_field_shard_id(&timeline); + let lease = timeline.make_lsn_lease(lsn, timeline.get_lsn_lease_length(), ctx)?; let valid_until = lease .valid_until @@ -683,14 +835,17 @@ impl PageServerHandler { req: &PagestreamExistsRequest, ctx: &RequestContext, ) -> Result { - let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + let timeline = self + .timeline_handles + .get(tenant_id, timeline_id, ShardSelector::Zero) + .await?; let _timer = timeline .query_metrics .start_timer(metrics::SmgrQueryType::GetRelExists, ctx); let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( - timeline, + &timeline, req.request_lsn, req.not_modified_since, &latest_gc_cutoff_lsn, @@ -715,7 +870,10 @@ impl PageServerHandler { req: &PagestreamNblocksRequest, ctx: &RequestContext, ) -> Result { - let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + let timeline = self + .timeline_handles + .get(tenant_id, timeline_id, ShardSelector::Zero) + .await?; let _timer = timeline .query_metrics @@ -723,7 +881,7 @@ impl PageServerHandler { let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( - timeline, + &timeline, req.request_lsn, req.not_modified_since, &latest_gc_cutoff_lsn, @@ -748,7 +906,10 @@ impl PageServerHandler { req: &PagestreamDbSizeRequest, ctx: &RequestContext, ) -> Result { - let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + let timeline = self + .timeline_handles + .get(tenant_id, timeline_id, ShardSelector::Zero) + .await?; let _timer = timeline .query_metrics @@ -756,7 +917,7 @@ impl PageServerHandler { let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( - timeline, + &timeline, req.request_lsn, req.not_modified_since, &latest_gc_cutoff_lsn, @@ -774,122 +935,6 @@ impl PageServerHandler { })) } - /// For most getpage requests, we will already have a Timeline to serve the request: this function - /// looks up such a Timeline synchronously and without touching any global state. - fn get_cached_timeline_for_page( - &mut self, - req: &PagestreamGetPageRequest, - ) -> Result<&Arc, Key> { - let key = if let Some((first_idx, first_timeline)) = self.shard_timelines.iter().next() { - // Fastest path: single sharded case - if first_idx.shard_count.count() == 1 { - return Ok(&first_timeline.timeline); - } - - let key = rel_block_to_key(req.rel, req.blkno); - let shard_num = first_timeline - .timeline - .get_shard_identity() - .get_shard_number(&key); - - // Fast path: matched the first timeline in our local handler map. This case is common if - // only one shard per tenant is attached to this pageserver. - if first_timeline.timeline.get_shard_identity().number == shard_num { - return Ok(&first_timeline.timeline); - } - - let shard_index = ShardIndex { - shard_number: shard_num, - shard_count: first_timeline.timeline.get_shard_identity().count, - }; - - // Fast-ish path: timeline is in the connection handler's local cache - if let Some(found) = self.shard_timelines.get(&shard_index) { - return Ok(&found.timeline); - } - - key - } else { - rel_block_to_key(req.rel, req.blkno) - }; - - Err(key) - } - - /// Having looked up the [`Timeline`] instance for a particular shard, cache it to enable - /// use in future requests without having to traverse [`crate::tenant::mgr::TenantManager`] - /// again. - /// - /// Note that all the Timelines in this cache are for the same timeline_id: they're differ - /// in which shard they belong to. When we serve a getpage@lsn request, we choose a shard - /// based on key. - /// - /// The typical size of this cache is 1, as we generally create shards to distribute work - /// across pageservers, so don't tend to have multiple shards for the same tenant on the - /// same pageserver. - fn cache_timeline( - &mut self, - timeline: Arc, - ) -> Result<&Arc, GetActiveTimelineError> { - let gate_guard = timeline - .gate - .enter() - .map_err(|_| GetActiveTimelineError::Tenant(GetActiveTenantError::Cancelled))?; - - let shard_index = timeline.tenant_shard_id.to_index(); - let entry = self - .shard_timelines - .entry(shard_index) - .or_insert(HandlerTimeline { - timeline, - _guard: gate_guard, - }); - - Ok(&entry.timeline) - } - - /// If [`Self::get_cached_timeline_for_page`] missed, then this function is used to populate the cache with - /// a Timeline to serve requests for this key, if such a Timeline is present on this pageserver. If no such - /// Timeline is found, then we will return an error (this indicates that the client is talking to the wrong node). - async fn load_timeline_for_page( - &mut self, - tenant_id: TenantId, - timeline_id: TimelineId, - key: Key, - ) -> anyhow::Result<&Arc, GetActiveTimelineError> { - // Slow path: we must call out to the TenantManager to find the timeline for this Key - let timeline = self - .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Page(key)) - .await?; - - self.cache_timeline(timeline) - } - - async fn get_timeline_shard_zero( - &mut self, - tenant_id: TenantId, - timeline_id: TimelineId, - ) -> anyhow::Result<&Arc, GetActiveTimelineError> { - // This is a borrow-checker workaround: we can't return from inside of the `if let Some` because - // that would be an immutable-borrow-self return, whereas later in the function we will use a mutable - // ref to salf. So instead, we first build a bool, and then return while not borrowing self. - let have_cached = if let Some((idx, _tl)) = self.shard_timelines.iter().next() { - idx.shard_number == ShardNumber(0) - } else { - false - }; - - if have_cached { - let entry = self.shard_timelines.iter().next().unwrap(); - Ok(&entry.1.timeline) - } else { - let timeline = self - .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero) - .await?; - Ok(self.cache_timeline(timeline)?) - } - } - #[instrument(skip_all, fields(shard_id))] async fn handle_get_page_at_lsn_request( &mut self, @@ -898,33 +943,30 @@ impl PageServerHandler { req: &PagestreamGetPageRequest, ctx: &RequestContext, ) -> Result { - let timeline = match self.get_cached_timeline_for_page(req) { - Ok(tl) => { - set_tracing_field_shard_id(tl); - tl - } - Err(key) => { - match self - .load_timeline_for_page(tenant_id, timeline_id, key) - .await - { - Ok(t) => t, - Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => { - // We already know this tenant exists in general, because we resolved it at - // start of connection. Getting a NotFound here indicates that the shard containing - // the requested page is not present on this node: the client's knowledge of shard->pageserver - // mapping is out of date. - // - // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via - // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration - // and talk to a different pageserver. - return Err(PageStreamError::Reconnect( - "getpage@lsn request routed to wrong shard".into(), - )); - } - Err(e) => return Err(e.into()), - } + let timeline = match self + .timeline_handles + .get( + tenant_id, + timeline_id, + ShardSelector::Page(rel_block_to_key(req.rel, req.blkno)), + ) + .await + { + Ok(tl) => tl, + Err(GetActiveTimelineError::Tenant(GetActiveTenantError::NotFound(_))) => { + // We already know this tenant exists in general, because we resolved it at + // start of connection. Getting a NotFound here indicates that the shard containing + // the requested page is not present on this node: the client's knowledge of shard->pageserver + // mapping is out of date. + // + // Closing the connection by returning ``::Reconnect` has the side effect of rate-limiting above message, via + // client's reconnect backoff, as well as hopefully prompting the client to load its updated configuration + // and talk to a different pageserver. + return Err(PageStreamError::Reconnect( + "getpage@lsn request routed to wrong shard".into(), + )); } + Err(e) => return Err(e.into()), }; let _timer = timeline @@ -933,7 +975,7 @@ impl PageServerHandler { let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( - timeline, + &timeline, req.request_lsn, req.not_modified_since, &latest_gc_cutoff_lsn, @@ -958,7 +1000,10 @@ impl PageServerHandler { req: &PagestreamGetSlruSegmentRequest, ctx: &RequestContext, ) -> Result { - let timeline = self.get_timeline_shard_zero(tenant_id, timeline_id).await?; + let timeline = self + .timeline_handles + .get(tenant_id, timeline_id, ShardSelector::Zero) + .await?; let _timer = timeline .query_metrics @@ -966,7 +1011,7 @@ impl PageServerHandler { let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); let lsn = Self::wait_or_get_last_lsn( - timeline, + &timeline, req.request_lsn, req.not_modified_since, &latest_gc_cutoff_lsn, @@ -987,6 +1032,15 @@ impl PageServerHandler { /// Full basebackups should only be used for debugging purposes. /// Originally, it was introduced to enable breaking storage format changes, /// but that is not applicable anymore. + /// + /// # Coding Discipline + /// + /// Coding discipline within this function: all interaction with the `pgb` connection + /// needs to be sensitive to connection shutdown, currently signalled via [`Self::cancel`]. + /// This is so that we can shutdown page_service quickly. + /// + /// TODO: wrap the pgb that we pass to the basebackup handler so that it's sensitive + /// to connection cancellation. #[allow(clippy::too_many_arguments)] #[instrument(skip_all, fields(shard_id, ?lsn, ?prev_lsn, %full_backup))] async fn handle_basebackup_request( @@ -1012,10 +1066,11 @@ impl PageServerHandler { let started = std::time::Instant::now(); - // check that the timeline exists let timeline = self - .get_active_tenant_timeline(tenant_id, timeline_id, ShardSelector::Zero) + .timeline_handles + .get(tenant_id, timeline_id, ShardSelector::Zero) .await?; + let latest_gc_cutoff_lsn = timeline.get_latest_gc_cutoff_lsn(); if let Some(lsn) = lsn { // Backup was requested at a particular LSN. Wait for it to arrive. @@ -1037,7 +1092,7 @@ impl PageServerHandler { // switch client to COPYOUT pgb.write_message_noflush(&BeMessage::CopyOutResponse) .map_err(QueryError::Disconnected)?; - self.flush_cancellable(pgb, &timeline.cancel).await?; + self.flush_cancellable(pgb, &self.cancel).await?; // Send a tarball of the latest layer on the timeline. Compress if not // fullbackup. TODO Compress in that case too (tests need to be updated) @@ -1128,77 +1183,6 @@ impl PageServerHandler { .expect("claims presence already checked"); check_permission(claims, tenant_id).map_err(|e| QueryError::Unauthorized(e.0)) } - - /// Shorthand for getting a reference to a Timeline of an Active tenant. - async fn get_active_tenant_timeline( - &self, - tenant_id: TenantId, - timeline_id: TimelineId, - selector: ShardSelector, - ) -> Result, GetActiveTimelineError> { - let tenant = self - .get_active_tenant_with_timeout(tenant_id, selector, ACTIVE_TENANT_TIMEOUT) - .await - .map_err(GetActiveTimelineError::Tenant)?; - let timeline = tenant.get_timeline(timeline_id, true)?; - set_tracing_field_shard_id(&timeline); - Ok(timeline) - } - - /// Get a shard's [`Tenant`] in its active state, if present. If we don't find the shard and some - /// slots for this tenant are `InProgress` then we will wait. - /// If we find the [`Tenant`] and it's not yet in state [`TenantState::Active`], we will wait. - /// - /// `timeout` is used as a total timeout for the whole wait operation. - async fn get_active_tenant_with_timeout( - &self, - tenant_id: TenantId, - shard_selector: ShardSelector, - timeout: Duration, - ) -> Result, GetActiveTenantError> { - let wait_start = Instant::now(); - let deadline = wait_start + timeout; - - // Resolve TenantId to TenantShardId. This is usually a quick one-shot thing, the loop is - // for handling the rare case that the slot we're accessing is InProgress. - let tenant_shard = loop { - let resolved = self - .tenant_manager - .resolve_attached_shard(&tenant_id, shard_selector); - match resolved { - ShardResolveResult::Found(tenant_shard) => break tenant_shard, - ShardResolveResult::NotFound => { - return Err(GetActiveTenantError::NotFound(GetTenantError::NotFound( - tenant_id, - ))); - } - ShardResolveResult::InProgress(barrier) => { - // We can't authoritatively answer right now: wait for InProgress state - // to end, then try again - tokio::select! { - _ = self.await_connection_cancelled() => { - return Err(GetActiveTenantError::Cancelled) - }, - _ = barrier.wait() => { - // The barrier completed: proceed around the loop to try looking up again - }, - _ = tokio::time::sleep(deadline.duration_since(Instant::now())) => { - return Err(GetActiveTenantError::WaitForActiveTimeout { - latest_state: None, - wait_time: timeout, - }); - } - } - } - }; - }; - - tracing::debug!("Waiting for tenant to enter active state..."); - tenant_shard - .wait_to_become_active(deadline.duration_since(Instant::now())) - .await?; - Ok(tenant_shard) - } } #[async_trait::async_trait] @@ -1505,7 +1489,7 @@ impl From for QueryError { } #[derive(Debug, thiserror::Error)] -enum GetActiveTimelineError { +pub(crate) enum GetActiveTimelineError { #[error(transparent)] Tenant(GetActiveTenantError), #[error(transparent)] diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 48c1851a3a..5d0e963b4e 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -386,6 +386,8 @@ impl WalRedoManager { #[derive(Debug, thiserror::Error, PartialEq, Eq)] pub enum GetTimelineError { + #[error("Timeline is shutting down")] + ShuttingDown, #[error("Timeline {tenant_id}/{timeline_id} is not active, state: {state:?}")] NotActive { tenant_id: TenantShardId, diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 5e1f69f4c1..58f8990892 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -116,8 +116,6 @@ pub(crate) enum ShardSelector { /// Only return the 0th shard, if it is present. If a non-0th shard is present, /// ignore it. Zero, - /// Pick the first shard we find for the TenantId - First, /// Pick the shard that holds this key Page(Key), /// The shard ID is known: pick the given shard @@ -2088,7 +2086,6 @@ impl TenantManager { }; match selector { - ShardSelector::First => return ShardResolveResult::Found(tenant.clone()), ShardSelector::Zero if slot.0.shard_number == ShardNumber(0) => { return ShardResolveResult::Found(tenant.clone()) } @@ -2170,6 +2167,9 @@ pub(crate) enum GetActiveTenantError { /// never happen. #[error("Tenant is broken: {0}")] Broken(String), + + #[error("reconnect to switch tenant id")] + SwitchedTenant, } #[derive(Debug, thiserror::Error)] diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 4db44a3a19..ecae443079 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -3,6 +3,7 @@ pub(crate) mod compaction; pub mod delete; pub(crate) mod detach_ancestor; mod eviction_task; +pub(crate) mod handle; mod init; pub mod layer_manager; pub(crate) mod logical_size; @@ -17,6 +18,7 @@ use camino::Utf8Path; use chrono::{DateTime, Utc}; use enumset::EnumSet; use fail::fail_point; +use handle::ShardTimelineId; use once_cell::sync::Lazy; use pageserver_api::{ key::{ @@ -424,6 +426,8 @@ pub struct Timeline { pub(crate) extra_test_dense_keyspace: ArcSwap, pub(crate) l0_flush_global_state: L0FlushGlobalState, + + pub(crate) handles: handle::PerTimelineState, } pub struct WalReceiverInfo { @@ -1915,6 +1919,9 @@ impl Timeline { tracing::debug!("Cancelling CancellationToken"); self.cancel.cancel(); + // Ensure Prevent new page service requests from starting. + self.handles.shutdown(); + // Transition the remote_client into a state where it's only useful for timeline deletion. // (The deletion use case is why we can't just hook up remote_client to Self::cancel).) self.remote_client.stop(); @@ -2440,6 +2447,8 @@ impl Timeline { extra_test_dense_keyspace: ArcSwap::new(Arc::new(KeySpace::default())), l0_flush_global_state: resources.l0_flush_global_state, + + handles: Default::default(), }; result.repartition_threshold = result.get_checkpoint_distance() / REPARTITION_FREQ_IN_CHECKPOINT_DISTANCE; @@ -3709,6 +3718,17 @@ impl Timeline { &self.shard_identity } + #[inline(always)] + pub(crate) fn shard_timeline_id(&self) -> ShardTimelineId { + ShardTimelineId { + shard_index: ShardIndex { + shard_number: self.shard_identity.number, + shard_count: self.shard_identity.count, + }, + timeline_id: self.timeline_id, + } + } + /// /// Get a handle to the latest layer for appending. /// diff --git a/pageserver/src/tenant/timeline/handle.rs b/pageserver/src/tenant/timeline/handle.rs new file mode 100644 index 0000000000..e82559b8b3 --- /dev/null +++ b/pageserver/src/tenant/timeline/handle.rs @@ -0,0 +1,967 @@ +//! An efficient way to keep the timeline gate open without preventing +//! timeline shutdown for longer than a single call to a timeline method. +//! +//! # Motivation +//! +//! On a single page service connection, we're typically serving a single TenantTimelineId. +//! +//! Without sharding, there is a single Timeline object to which we dispatch +//! all requests. For example, a getpage request gets dispatched to the +//! Timeline::get method of the Timeline object that represents the +//! (tenant,timeline) of that connection. +//! +//! With sharding, for each request that comes in on the connection, +//! we first have to perform shard routing based on the requested key (=~ page number). +//! The result of shard routing is a Timeline object. +//! We then dispatch the request to that Timeline object. +//! +//! Regardless of whether the tenant is sharded or not, we want to ensure that +//! we hold the Timeline gate open while we're invoking the method on the +//! Timeline object. +//! +//! However, we want to avoid the overhead of entering the gate for every +//! method invocation. +//! +//! Further, for shard routing, we want to avoid calling the tenant manager to +//! resolve the shard for every request. Instead, we want to cache the +//! routing result so we can bypass the tenant manager for all subsequent requests +//! that get routed to that shard. +//! +//! Regardless of how we accomplish the above, it should not +//! prevent the Timeline from shutting down promptly. +//! +//! # Design +//! +//! There are three user-facing data structures: +//! - `PerTimelineState`: a struct embedded into each Timeline struct. Lifetime == Timeline lifetime. +//! - `Cache`: a struct private to each connection handler; Lifetime == connection lifetime. +//! - `Handle`: a smart pointer that holds the Timeline gate open and derefs to `&Timeline`. +//! Lifetime: for a single request dispatch on the Timeline (i.e., one getpage request) +//! +//! The `Handle` is just a wrapper around an `Arc`. +//! +//! There is one long-lived `Arc`, which is stored in the `PerTimelineState`. +//! The `Cache` stores a `Weak` for each cached Timeline. +//! +//! To dispatch a request, the page service connection calls `Cache::get`. +//! +//! A cache miss means we consult the tenant manager for shard routing, +//! resulting in an `Arc`. We enter its gate _once_ and construct an +//! `Arc`. We store a `Weak` in the cache +//! and the `Arc` in the `PerTimelineState`. +//! +//! For subsequent requests, `Cache::get` will perform a "fast path" shard routing +//! and find the `Weak` in the cache. +//! We upgrade the `Weak` to an `Arc` and wrap it in the user-facing `Handle` type. +//! +//! The request handler dispatches the request to the right `>::$request_method`. +//! It then drops the `Handle`, which drops the `Arc`. +//! +//! # Memory Management / How The Reference Cycle Is Broken +//! +//! The attentive reader may have noticed the strong reference cycle +//! from `Arc` to `PerTimelineState` to `Arc`. +//! +//! This cycle is intentional: while it exists, the `Cache` can upgrade its +//! `Weak` to an `Arc` in a single atomic operation. +//! +//! The cycle is broken by either +//! - `PerTimelineState::shutdown` or +//! - dropping the `Cache`. +//! +//! Concurrently existing `Handle`s will extend the existence of the cycle. +//! However, since `Handle`s are short-lived and new `Handle`s are not +//! handed out after either `PerTimelineState::shutdown` or `Cache` drop, +//! that extension of the cycle is bounded. +//! +//! # Fast Path for Shard Routing +//! +//! The `Cache` has a fast path for shard routing to avoid calling into +//! the tenant manager for every request. +//! +//! The `Cache` maintains a hash map of `ShardTimelineId` to `Weak`. +//! +//! The current implementation uses the first entry in the hash map +//! to determine the `ShardParameters` and derive the correct +//! `ShardIndex` for the requested key. +//! +//! It then looks up the hash map for that `ShardTimelineId := {ShardIndex,TimelineId}`. +//! +//! If the lookup is successful and the `Weak` can be upgraded, +//! it's a hit. +//! +//! ## Cache invalidation +//! +//! The insight is that cache invalidation is sufficient and most efficiently done lazily. +//! The only reasons why an entry in the cache can become stale are: +//! 1. The `PerTimelineState` / Timeline is shutting down e.g. because the shard is +//! being detached, timeline or shard deleted, or pageserver is shutting down. +//! 2. We're doing a shard split and new traffic should be routed to the child shards. +//! +//! Regarding (1), we will eventually fail to upgrade the `Weak` once the +//! timeline has shut down, and when that happens, we remove the entry from the cache. +//! +//! Regarding (2), the insight is that it is toally fine to keep dispatching requests +//! to the parent shard during a shard split. Eventually, the shard split task will +//! shut down the parent => case (1). + +use std::collections::hash_map; +use std::collections::HashMap; +use std::sync::atomic::AtomicBool; +use std::sync::atomic::Ordering; +use std::sync::Arc; +use std::sync::Mutex; +use std::sync::Weak; + +use pageserver_api::shard::ShardIdentity; +use tracing::instrument; +use tracing::trace; +use utils::id::TimelineId; +use utils::shard::ShardIndex; +use utils::shard::ShardNumber; + +use crate::tenant::mgr::ShardSelector; + +/// The requirement for Debug is so that #[derive(Debug)] works in some places. +pub(crate) trait Types: Sized + std::fmt::Debug { + type TenantManagerError: Sized + std::fmt::Debug; + type TenantManager: TenantManager + Sized; + type Timeline: ArcTimeline + Sized; +} + +/// Uniquely identifies a [`Cache`] instance over the lifetime of the process. +/// Required so [`Cache::drop`] can take out the handles from the [`PerTimelineState`]. +/// Alternative to this would be to allocate [`Cache`] in a `Box` and identify it by the pointer. +#[derive(Debug, Hash, PartialEq, Eq, Clone, Copy)] +struct CacheId(u64); + +impl CacheId { + fn next() -> Self { + static NEXT_ID: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1); + let id = NEXT_ID.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if id == 0 { + panic!("CacheId::new() returned 0, overflow"); + } + Self(id) + } +} + +/// See module-level comment. +pub(crate) struct Cache { + id: CacheId, + map: Map, +} + +type Map = HashMap>>; + +impl Default for Cache { + fn default() -> Self { + Self { + id: CacheId::next(), + map: Default::default(), + } + } +} + +#[derive(PartialEq, Eq, Debug, Hash, Clone, Copy)] +pub(crate) struct ShardTimelineId { + pub(crate) shard_index: ShardIndex, + pub(crate) timeline_id: TimelineId, +} + +/// See module-level comment. +pub(crate) struct Handle(Arc>); +struct HandleInner { + shut_down: AtomicBool, + timeline: T::Timeline, + // The timeline's gate held open. + _gate_guard: utils::sync::gate::GateGuard, +} + +/// Embedded in each [`Types::Timeline`] as the anchor for the only long-lived strong ref to `HandleInner`. +/// +/// See module-level comment for details. +pub struct PerTimelineState { + // None = shutting down + handles: Mutex>>>>, +} + +impl Default for PerTimelineState { + fn default() -> Self { + Self { + handles: Mutex::new(Some(Default::default())), + } + } +} + +/// Abstract view of [`crate::tenant::mgr`], for testability. +pub(crate) trait TenantManager { + /// Invoked by [`Cache::get`] to resolve a [`ShardTimelineId`] to a [`Types::Timeline`]. + /// Errors are returned as [`GetError::TenantManager`]. + async fn resolve( + &self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + ) -> Result; +} + +/// Abstract view of an [`Arc`], for testability. +pub(crate) trait ArcTimeline: Clone { + fn gate(&self) -> &utils::sync::gate::Gate; + fn shard_timeline_id(&self) -> ShardTimelineId; + fn get_shard_identity(&self) -> &ShardIdentity; + fn per_timeline_state(&self) -> &PerTimelineState; +} + +/// Errors returned by [`Cache::get`]. +#[derive(Debug)] +pub(crate) enum GetError { + TenantManager(T::TenantManagerError), + TimelineGateClosed, + PerTimelineStateShutDown, +} + +/// Internal type used in [`Cache::get`]. +enum RoutingResult { + FastPath(Handle), + SlowPath(ShardTimelineId), + NeedConsultTenantManager, +} + +impl Cache { + /// See module-level comment for details. + /// + /// Does NOT check for the shutdown state of [`Types::Timeline`]. + /// Instead, the methods of [`Types::Timeline`] that are invoked through + /// the [`Handle`] are responsible for checking these conditions + /// and if so, return an error that causes the page service to + /// close the connection. + #[instrument(level = "trace", skip_all)] + pub(crate) async fn get( + &mut self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + tenant_manager: &T::TenantManager, + ) -> Result, GetError> { + // terminates because each iteration removes an element from the map + loop { + let handle = self + .get_impl(timeline_id, shard_selector, tenant_manager) + .await?; + if handle.0.shut_down.load(Ordering::Relaxed) { + let removed = self + .map + .remove(&handle.0.timeline.shard_timeline_id()) + .expect("invariant of get_impl is that the returned handle is in the map"); + assert!( + Weak::ptr_eq(&removed, &Arc::downgrade(&handle.0)), + "shard_timeline_id() incorrect?" + ); + } else { + return Ok(handle); + } + } + } + + #[instrument(level = "trace", skip_all)] + async fn get_impl( + &mut self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + tenant_manager: &T::TenantManager, + ) -> Result, GetError> { + let miss: ShardSelector = { + let routing_state = self.shard_routing(timeline_id, shard_selector); + match routing_state { + RoutingResult::FastPath(handle) => return Ok(handle), + RoutingResult::SlowPath(key) => match self.map.get(&key) { + Some(cached) => match cached.upgrade() { + Some(upgraded) => return Ok(Handle(upgraded)), + None => { + trace!("handle cache stale"); + self.map.remove(&key).unwrap(); + ShardSelector::Known(key.shard_index) + } + }, + None => ShardSelector::Known(key.shard_index), + }, + RoutingResult::NeedConsultTenantManager => shard_selector, + } + }; + self.get_miss(timeline_id, miss, tenant_manager).await + } + + #[inline(always)] + fn shard_routing( + &mut self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + ) -> RoutingResult { + loop { + // terminates because when every iteration we remove an element from the map + let Some((first_key, first_handle)) = self.map.iter().next() else { + return RoutingResult::NeedConsultTenantManager; + }; + let Some(first_handle) = first_handle.upgrade() else { + // TODO: dedup with get() + trace!("handle cache stale"); + let first_key_owned = *first_key; + self.map.remove(&first_key_owned).unwrap(); + continue; + }; + + let first_handle_shard_identity = first_handle.timeline.get_shard_identity(); + let make_shard_index = |shard_num: ShardNumber| ShardIndex { + shard_number: shard_num, + shard_count: first_handle_shard_identity.count, + }; + + let need_idx = match shard_selector { + ShardSelector::Page(key) => { + make_shard_index(first_handle_shard_identity.get_shard_number(&key)) + } + ShardSelector::Zero => make_shard_index(ShardNumber(0)), + ShardSelector::Known(shard_idx) => shard_idx, + }; + let need_shard_timeline_id = ShardTimelineId { + shard_index: need_idx, + timeline_id, + }; + let first_handle_shard_timeline_id = ShardTimelineId { + shard_index: first_handle_shard_identity.shard_index(), + timeline_id: first_handle.timeline.shard_timeline_id().timeline_id, + }; + + if need_shard_timeline_id == first_handle_shard_timeline_id { + return RoutingResult::FastPath(Handle(first_handle)); + } else { + return RoutingResult::SlowPath(need_shard_timeline_id); + } + } + } + + #[instrument(level = "trace", skip_all)] + #[inline(always)] + async fn get_miss( + &mut self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + tenant_manager: &T::TenantManager, + ) -> Result, GetError> { + match tenant_manager.resolve(timeline_id, shard_selector).await { + Ok(timeline) => { + let key = timeline.shard_timeline_id(); + match &shard_selector { + ShardSelector::Zero => assert_eq!(key.shard_index.shard_number, ShardNumber(0)), + ShardSelector::Page(_) => (), // gotta trust tenant_manager + ShardSelector::Known(idx) => assert_eq!(idx, &key.shard_index), + } + + let gate_guard = match timeline.gate().enter() { + Ok(guard) => guard, + Err(_) => { + return Err(GetError::TimelineGateClosed); + } + }; + trace!("creating new HandleInner"); + let handle = Arc::new( + // TODO: global metric that keeps track of the number of live HandlerTimeline instances + // so we can identify reference cycle bugs. + HandleInner { + shut_down: AtomicBool::new(false), + _gate_guard: gate_guard, + timeline: timeline.clone(), + }, + ); + let handle = { + let mut lock_guard = timeline + .per_timeline_state() + .handles + .lock() + .expect("mutex poisoned"); + match &mut *lock_guard { + Some(per_timeline_state) => { + let replaced = per_timeline_state.insert(self.id, Arc::clone(&handle)); + assert!(replaced.is_none(), "some earlier code left a stale handle"); + match self.map.entry(key) { + hash_map::Entry::Occupied(_o) => { + // This cannot not happen because + // 1. we're the _miss_ handle, i.e., `self.map` didn't contain an entry and + // 2. we were holding &mut self during .resolve().await above, so, no other thread can have inserted a handle + // while we were waiting for the tenant manager. + unreachable!() + } + hash_map::Entry::Vacant(v) => { + v.insert(Arc::downgrade(&handle)); + handle + } + } + } + None => { + return Err(GetError::PerTimelineStateShutDown); + } + } + }; + Ok(Handle(handle)) + } + Err(e) => Err(GetError::TenantManager(e)), + } + } +} + +impl PerTimelineState { + /// After this method returns, [`Cache::get`] will never again return a [`Handle`] + /// to the [`Types::Timeline`] that embeds this per-timeline state. + /// Even if [`TenantManager::resolve`] would still resolve to it. + /// + /// Already-alive [`Handle`]s for will remain open, usable, and keeping the [`ArcTimeline`] alive. + /// That's ok because they're short-lived. See module-level comment for details. + #[instrument(level = "trace", skip_all)] + pub(super) fn shutdown(&self) { + let handles = self + .handles + .lock() + .expect("mutex poisoned") + // NB: this .take() sets locked to None. + // That's what makes future `Cache::get` misses fail. + // Cache hits are taken care of below. + .take(); + let Some(handles) = handles else { + trace!("already shut down"); + return; + }; + for handle in handles.values() { + // Make hits fail. + handle.shut_down.store(true, Ordering::Relaxed); + } + drop(handles); + } +} + +impl std::ops::Deref for Handle { + type Target = T::Timeline; + fn deref(&self) -> &Self::Target { + &self.0.timeline + } +} + +#[cfg(test)] +impl Drop for HandleInner { + fn drop(&mut self) { + trace!("HandleInner dropped"); + } +} + +// When dropping a [`Cache`], prune its handles in the [`PerTimelineState`] to break the reference cycle. +impl Drop for Cache { + fn drop(&mut self) { + for (_, weak) in self.map.drain() { + if let Some(strong) = weak.upgrade() { + // handle is still being kept alive in PerTimelineState + let timeline = strong.timeline.per_timeline_state(); + let mut handles = timeline.handles.lock().expect("mutex poisoned"); + if let Some(handles) = &mut *handles { + let Some(removed) = handles.remove(&self.id) else { + // There could have been a shutdown inbetween us upgrading the weak and locking the mutex. + continue; + }; + assert!(Arc::ptr_eq(&removed, &strong)); + } + } + } + } +} + +#[cfg(test)] +mod tests { + use pageserver_api::{ + key::{rel_block_to_key, Key, DBDIR_KEY}, + models::ShardParameters, + reltag::RelTag, + shard::ShardStripeSize, + }; + use utils::shard::ShardCount; + + use super::*; + + const FOREVER: std::time::Duration = std::time::Duration::from_secs(u64::MAX); + + #[derive(Debug)] + struct TestTypes; + impl Types for TestTypes { + type TenantManagerError = anyhow::Error; + type TenantManager = StubManager; + type Timeline = Arc; + } + + struct StubManager { + shards: Vec>, + } + + struct StubTimeline { + gate: utils::sync::gate::Gate, + id: TimelineId, + shard: ShardIdentity, + per_timeline_state: PerTimelineState, + myself: Weak, + } + + impl StubTimeline { + fn getpage(&self) { + // do nothing + } + } + + impl ArcTimeline for Arc { + fn gate(&self) -> &utils::sync::gate::Gate { + &self.gate + } + + fn shard_timeline_id(&self) -> ShardTimelineId { + ShardTimelineId { + shard_index: self.shard.shard_index(), + timeline_id: self.id, + } + } + + fn get_shard_identity(&self) -> &ShardIdentity { + &self.shard + } + + fn per_timeline_state(&self) -> &PerTimelineState { + &self.per_timeline_state + } + } + + impl TenantManager for StubManager { + async fn resolve( + &self, + timeline_id: TimelineId, + shard_selector: ShardSelector, + ) -> anyhow::Result> { + for timeline in &self.shards { + if timeline.id == timeline_id { + match &shard_selector { + ShardSelector::Zero if timeline.shard.is_shard_zero() => { + return Ok(Arc::clone(timeline)); + } + ShardSelector::Zero => continue, + ShardSelector::Page(key) if timeline.shard.is_key_local(key) => { + return Ok(Arc::clone(timeline)); + } + ShardSelector::Page(_) => continue, + ShardSelector::Known(idx) if idx == &timeline.shard.shard_index() => { + return Ok(Arc::clone(timeline)); + } + ShardSelector::Known(_) => continue, + } + } + } + anyhow::bail!("not found") + } + } + + #[tokio::test(start_paused = true)] + async fn test_timeline_shutdown() { + crate::tenant::harness::setup_logging(); + + let timeline_id = TimelineId::generate(); + let shard0 = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_id, + shard: ShardIdentity::unsharded(), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let mgr = StubManager { + shards: vec![shard0.clone()], + }; + let key = DBDIR_KEY; + + let mut cache = Cache::::default(); + + // + // fill the cache + // + assert_eq!( + (Arc::strong_count(&shard0), Arc::weak_count(&shard0)), + (2, 1), + "strong: shard0, mgr; weak: myself" + ); + + let handle: Handle<_> = cache + .get(timeline_id, ShardSelector::Page(key), &mgr) + .await + .expect("we have the timeline"); + let handle_inner_weak = Arc::downgrade(&handle.0); + assert!(Weak::ptr_eq(&handle.myself, &shard0.myself)); + assert_eq!( + ( + Weak::strong_count(&handle_inner_weak), + Weak::weak_count(&handle_inner_weak) + ), + (2, 2), + "strong: handle, per_timeline_state, weak: handle_inner_weak, cache" + ); + assert_eq!(cache.map.len(), 1); + + assert_eq!( + (Arc::strong_count(&shard0), Arc::weak_count(&shard0)), + (3, 1), + "strong: handleinner(per_timeline_state), shard0, mgr; weak: myself" + ); + drop(handle); + assert_eq!( + (Arc::strong_count(&shard0), Arc::weak_count(&shard0)), + (3, 1), + "strong: handleinner(per_timeline_state), shard0, mgr; weak: myself" + ); + + // + // demonstrate that Handle holds up gate closure + // but shutdown prevents new handles from being handed out + // + + tokio::select! { + _ = shard0.gate.close() => { + panic!("cache and per-timeline handler state keep cache open"); + } + _ = tokio::time::sleep(FOREVER) => { + // NB: first poll of close() makes it enter closing state + } + } + + let handle = cache + .get(timeline_id, ShardSelector::Page(key), &mgr) + .await + .expect("we have the timeline"); + assert!(Weak::ptr_eq(&handle.myself, &shard0.myself)); + + // SHUTDOWN + shard0.per_timeline_state.shutdown(); // keeping handle alive across shutdown + + assert_eq!( + 1, + Weak::strong_count(&handle_inner_weak), + "through local var handle" + ); + assert_eq!( + cache.map.len(), + 1, + "this is an implementation detail but worth pointing out: we can't clear the cache from shutdown(), it's cleared on first access after" + ); + assert_eq!( + (Arc::strong_count(&shard0), Arc::weak_count(&shard0)), + (3, 1), + "strong: handleinner(via handle), shard0, mgr; weak: myself" + ); + + // this handle is perfectly usable + handle.getpage(); + + cache + .get(timeline_id, ShardSelector::Page(key), &mgr) + .await + .err() + .expect("documented behavior: can't get new handle after shutdown, even if there is an alive Handle"); + assert_eq!( + cache.map.len(), + 0, + "first access after shutdown cleans up the Weak's from the cache" + ); + + tokio::select! { + _ = shard0.gate.close() => { + panic!("handle is keeping gate open"); + } + _ = tokio::time::sleep(FOREVER) => { } + } + + drop(handle); + assert_eq!( + 0, + Weak::strong_count(&handle_inner_weak), + "the HandleInner destructor already ran" + ); + assert_eq!( + (Arc::strong_count(&shard0), Arc::weak_count(&shard0)), + (2, 1), + "strong: shard0, mgr; weak: myself" + ); + + // closing gate succeeds after dropping handle + tokio::select! { + _ = shard0.gate.close() => { } + _ = tokio::time::sleep(FOREVER) => { + panic!("handle is dropped, no other gate holders exist") + } + } + + // map gets cleaned on next lookup + cache + .get(timeline_id, ShardSelector::Page(key), &mgr) + .await + .err() + .expect("documented behavior: can't get new handle after shutdown"); + assert_eq!(cache.map.len(), 0); + + // ensure all refs to shard0 are gone and we're not leaking anything + let myself = Weak::clone(&shard0.myself); + drop(shard0); + drop(mgr); + assert_eq!(Weak::strong_count(&myself), 0); + } + + #[tokio::test] + async fn test_multiple_timelines_and_deletion() { + crate::tenant::harness::setup_logging(); + + let timeline_a = TimelineId::generate(); + let timeline_b = TimelineId::generate(); + assert_ne!(timeline_a, timeline_b); + let timeline_a = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_a, + shard: ShardIdentity::unsharded(), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let timeline_b = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_b, + shard: ShardIdentity::unsharded(), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let mut mgr = StubManager { + shards: vec![timeline_a.clone(), timeline_b.clone()], + }; + let key = DBDIR_KEY; + + let mut cache = Cache::::default(); + + cache + .get(timeline_a.id, ShardSelector::Page(key), &mgr) + .await + .expect("we have it"); + cache + .get(timeline_b.id, ShardSelector::Page(key), &mgr) + .await + .expect("we have it"); + assert_eq!(cache.map.len(), 2); + + // delete timeline A + timeline_a.per_timeline_state.shutdown(); + mgr.shards.retain(|t| t.id != timeline_a.id); + assert!( + mgr.resolve(timeline_a.id, ShardSelector::Page(key)) + .await + .is_err(), + "broken StubManager implementation" + ); + + assert_eq!( + cache.map.len(), + 2, + "cache still has a Weak handle to Timeline A" + ); + cache + .get(timeline_a.id, ShardSelector::Page(key), &mgr) + .await + .err() + .expect("documented behavior: can't get new handle after shutdown"); + assert_eq!(cache.map.len(), 1, "next access cleans up the cache"); + + cache + .get(timeline_b.id, ShardSelector::Page(key), &mgr) + .await + .expect("we still have it"); + } + + fn make_relation_key_for_shard(shard: ShardNumber, params: &ShardParameters) -> Key { + rel_block_to_key( + RelTag { + spcnode: 1663, + dbnode: 208101, + relnode: 2620, + forknum: 0, + }, + shard.0 as u32 * params.stripe_size.0, + ) + } + + #[tokio::test(start_paused = true)] + async fn test_shard_split() { + crate::tenant::harness::setup_logging(); + let timeline_id = TimelineId::generate(); + let parent = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_id, + shard: ShardIdentity::unsharded(), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let child_params = ShardParameters { + count: ShardCount(2), + stripe_size: ShardStripeSize::default(), + }; + let child0 = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_id, + shard: ShardIdentity::from_params(ShardNumber(0), &child_params), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let child1 = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_id, + shard: ShardIdentity::from_params(ShardNumber(1), &child_params), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let child_shards_by_shard_number = [child0.clone(), child1.clone()]; + + let mut cache = Cache::::default(); + + // fill the cache with the parent + for i in 0..2 { + let handle = cache + .get( + timeline_id, + ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)), + &StubManager { + shards: vec![parent.clone()], + }, + ) + .await + .expect("we have it"); + assert!( + Weak::ptr_eq(&handle.myself, &parent.myself), + "mgr returns parent first" + ); + drop(handle); + } + + // + // SHARD SPLIT: tenant manager changes, but the cache isn't informed + // + + // while we haven't shut down the parent, the cache will return the cached parent, even + // if the tenant manager returns the child + for i in 0..2 { + let handle = cache + .get( + timeline_id, + ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)), + &StubManager { + shards: vec![], // doesn't matter what's in here, the cache is fully loaded + }, + ) + .await + .expect("we have it"); + assert!( + Weak::ptr_eq(&handle.myself, &parent.myself), + "mgr returns parent" + ); + drop(handle); + } + + let parent_handle = cache + .get( + timeline_id, + ShardSelector::Page(make_relation_key_for_shard(ShardNumber(0), &child_params)), + &StubManager { + shards: vec![parent.clone()], + }, + ) + .await + .expect("we have it"); + assert!(Weak::ptr_eq(&parent_handle.myself, &parent.myself)); + + // invalidate the cache + parent.per_timeline_state.shutdown(); + + // the cache will now return the child, even though the parent handle still exists + for i in 0..2 { + let handle = cache + .get( + timeline_id, + ShardSelector::Page(make_relation_key_for_shard(ShardNumber(i), &child_params)), + &StubManager { + shards: vec![child0.clone(), child1.clone()], // <====== this changed compared to previous loop + }, + ) + .await + .expect("we have it"); + assert!( + Weak::ptr_eq( + &handle.myself, + &child_shards_by_shard_number[i as usize].myself + ), + "mgr returns child" + ); + drop(handle); + } + + // all the while the parent handle kept the parent gate open + tokio::select! { + _ = parent_handle.gate.close() => { + panic!("parent handle is keeping gate open"); + } + _ = tokio::time::sleep(FOREVER) => { } + } + drop(parent_handle); + tokio::select! { + _ = parent.gate.close() => { } + _ = tokio::time::sleep(FOREVER) => { + panic!("parent handle is dropped, no other gate holders exist") + } + } + } + + #[tokio::test(start_paused = true)] + async fn test_connection_handler_exit() { + crate::tenant::harness::setup_logging(); + let timeline_id = TimelineId::generate(); + let shard0 = Arc::new_cyclic(|myself| StubTimeline { + gate: Default::default(), + id: timeline_id, + shard: ShardIdentity::unsharded(), + per_timeline_state: PerTimelineState::default(), + myself: myself.clone(), + }); + let mgr = StubManager { + shards: vec![shard0.clone()], + }; + let key = DBDIR_KEY; + + // Simulate 10 connections that's opened, used, and closed + let mut used_handles = vec![]; + for _ in 0..10 { + let mut cache = Cache::::default(); + let handle = { + let handle = cache + .get(timeline_id, ShardSelector::Page(key), &mgr) + .await + .expect("we have the timeline"); + assert!(Weak::ptr_eq(&handle.myself, &shard0.myself)); + handle + }; + handle.getpage(); + used_handles.push(Arc::downgrade(&handle.0)); + } + + // No handles exist, thus gates are closed and don't require shutdown + assert!(used_handles + .iter() + .all(|weak| Weak::strong_count(weak) == 0)); + + // ... thus the gate should close immediately, even without shutdown + tokio::select! { + _ = shard0.gate.close() => { } + _ = tokio::time::sleep(FOREVER) => { + panic!("handle is dropped, no other gate holders exist") + } + } + } +} From a006f7656e29223278523c7712c298dfbdee9efa Mon Sep 17 00:00:00 2001 From: Tristan Partin Date: Thu, 25 Jul 2024 15:45:15 -0500 Subject: [PATCH 19/37] Fix negative replication delay metric In some cases, we can get a negative metric for replication_delay_bytes. My best guess from all the research I've done is that we evaluate pg_last_wal_receive_lsn() before pg_last_wal_replay_lsn(), and that by the time everything is said and done, the replay LSN has advanced past the receive LSN. In this case, our lag can effectively be modeled as 0 due to the speed of the WAL reception and replay. --- vm-image-spec.yaml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vm-image-spec.yaml b/vm-image-spec.yaml index 2767710bad..7d005c7139 100644 --- a/vm-image-spec.yaml +++ b/vm-image-spec.yaml @@ -277,8 +277,12 @@ files: help: 'Bytes between received and replayed LSN' key_labels: values: [replication_delay_bytes] + # We use a GREATEST call here because this calculation can be negative. + # The calculation is not atomic, meaning after we've gotten the receive + # LSN, the replay LSN may have advanced past the receive LSN we + # are using for the calculation. query: | - SELECT pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn()) AS replication_delay_bytes; + SELECT GREATEST(0, pg_wal_lsn_diff(pg_last_wal_receive_lsn(), pg_last_wal_replay_lsn())) AS replication_delay_bytes; - metric_name: replication_delay_seconds type: gauge From 6c1bbe8434c299b2da588f3d26a40dbae9de11e8 Mon Sep 17 00:00:00 2001 From: Cihan Demirci <128653800+fcdm@users.noreply.github.com> Date: Wed, 31 Jul 2024 19:42:10 +0300 Subject: [PATCH 20/37] cicd: change Azure storage details [2/2] (#8562) Change Azure storage configuration to point to updated variables/secrets. Also update subscription id variable. --- .github/actionlint.yml | 1 - .github/workflows/_build-and-test-locally.yml | 6 +++--- .github/workflows/build_and_test.yml | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/actionlint.yml b/.github/actionlint.yml index f086008d34..37983798b7 100644 --- a/.github/actionlint.yml +++ b/.github/actionlint.yml @@ -9,6 +9,5 @@ self-hosted-runner: - us-east-2 config-variables: - REMOTE_STORAGE_AZURE_CONTAINER - - REMOTE_STORAGE_AZURE_CONTAINER_NEW - REMOTE_STORAGE_AZURE_REGION - SLACK_UPCOMING_RELEASE_CHANNEL_ID diff --git a/.github/workflows/_build-and-test-locally.yml b/.github/workflows/_build-and-test-locally.yml index 182e96a8ca..a0ed169024 100644 --- a/.github/workflows/_build-and-test-locally.yml +++ b/.github/workflows/_build-and-test-locally.yml @@ -223,9 +223,9 @@ jobs: # Run separate tests for real Azure Blob Storage # XXX: replace region with `eu-central-1`-like region export ENABLE_REAL_AZURE_REMOTE_STORAGE=y - export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV_NEW }}" - export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV_NEW }}" - export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER_NEW }}" + export AZURE_STORAGE_ACCOUNT="${{ secrets.AZURE_STORAGE_ACCOUNT_DEV }}" + export AZURE_STORAGE_ACCESS_KEY="${{ secrets.AZURE_STORAGE_ACCESS_KEY_DEV }}" + export REMOTE_STORAGE_AZURE_CONTAINER="${{ vars.REMOTE_STORAGE_AZURE_CONTAINER }}" export REMOTE_STORAGE_AZURE_REGION="${{ vars.REMOTE_STORAGE_AZURE_REGION }}" ${cov_prefix} cargo nextest run $CARGO_FLAGS $CARGO_FEATURES -E 'package(remote_storage)' -E 'test(test_real_azure)' diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index c4df98f585..50006dd3d4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -871,7 +871,7 @@ jobs: with: client-id: ${{ secrets.AZURE_DEV_CLIENT_ID }} tenant-id: ${{ secrets.AZURE_TENANT_ID }} - subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }} + subscription-id: ${{ secrets.AZURE_DEV_SUBSCRIPTION_ID }} - name: Login to ACR if: github.ref_name == 'main' From b3a76d9601b6b4cf45739426bfc53611804a08ed Mon Sep 17 00:00:00 2001 From: John Spray Date: Wed, 31 Jul 2024 18:37:47 +0100 Subject: [PATCH 21/37] controller: simplify reconciler generation increment logic (#8560) ## Problem This code was confusing, untested and covered: - an impossible case, where intent state is AttacheStale (we never do this) - a rare edge case (going from AttachedMulti to Attached), which we were not testing, and in any case the pageserver internally does the same Tenant reset in this transition as it would do if we incremented generation. Closes: https://github.com/neondatabase/neon/issues/8367 ## Summary of changes - Simplify the logic to only skip incrementing the generation if the location already has the expected generation and the exact same mode. --- storage_controller/src/reconciler.rs | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/storage_controller/src/reconciler.rs b/storage_controller/src/reconciler.rs index 12dea2c7ef..254fdb364e 100644 --- a/storage_controller/src/reconciler.rs +++ b/storage_controller/src/reconciler.rs @@ -656,11 +656,8 @@ impl Reconciler { // reconcile this location. This includes locations with different configurations, as well // as locations with unknown (None) observed state. - // The general case is to increment the generation. However, there are cases - // where this is not necessary: - // - if we are only updating the TenantConf part of the location - // - if we are only changing the attachment mode (e.g. going to attachedmulti or attachedstale) - // and the location was already in the correct generation + // Incrementing generation is the safe general case, but is inefficient for changes that only + // modify some details (e.g. the tenant's config). let increment_generation = match observed { None => true, Some(ObservedStateLocation { conf: None }) => true, @@ -669,18 +666,11 @@ impl Reconciler { }) => { let generations_match = observed.generation == wanted_conf.generation; - use LocationConfigMode::*; - let mode_transition_requires_gen_inc = - match (observed.mode, wanted_conf.mode) { - // Usually the short-lived attachment modes (multi and stale) are only used - // in the case of [`Self::live_migrate`], but it is simple to handle them correctly - // here too. Locations are allowed to go Single->Stale and Multi->Single within the same generation. - (AttachedSingle, AttachedStale) => false, - (AttachedMulti, AttachedSingle) => false, - (lhs, rhs) => lhs != rhs, - }; - - !generations_match || mode_transition_requires_gen_inc + // We may skip incrementing the generation if the location is already in the expected mode and + // generation. In principle it would also be safe to skip from certain other modes (e.g. AttachedStale), + // but such states are handled inside `live_migrate`, and if we see that state here we're cleaning up + // after a restart/crash, so fall back to the universally safe path of incrementing generation. + !generations_match || (observed.mode != wanted_conf.mode) } }; From 46379cd3f213a99b02a3944d45186447eb2bb696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Wed, 31 Jul 2024 20:24:42 +0200 Subject: [PATCH 22/37] storage_scrubber: migrate FindGarbage to remote_storage (#8548) Uses the newly added APIs from #8541 named `stream_tenants_generic` and `stream_objects_with_retries` and extends them with `list_objects_with_retries_generic` and `stream_tenant_timelines_generic` to migrate the `find-garbage` command of the scrubber to `GenericRemoteStorage`. Part of https://github.com/neondatabase/neon/issues/7547 --- libs/remote_storage/src/lib.rs | 1 + storage_scrubber/src/garbage.rs | 50 ++++++++++------------ storage_scrubber/src/lib.rs | 40 +++++++++++++++++ storage_scrubber/src/metadata_stream.rs | 57 +++++++++++++++++++++++++ 4 files changed, 121 insertions(+), 27 deletions(-) diff --git a/libs/remote_storage/src/lib.rs b/libs/remote_storage/src/lib.rs index 794e696769..2c9e298f79 100644 --- a/libs/remote_storage/src/lib.rs +++ b/libs/remote_storage/src/lib.rs @@ -144,6 +144,7 @@ impl RemotePath { /// /// The WithDelimiter mode will populate `prefixes` and `keys` in the result. The /// NoDelimiter mode will only populate `keys`. +#[derive(Copy, Clone)] pub enum ListingMode { WithDelimiter, NoDelimiter, diff --git a/storage_scrubber/src/garbage.rs b/storage_scrubber/src/garbage.rs index 73479c3658..d6a73bf366 100644 --- a/storage_scrubber/src/garbage.rs +++ b/storage_scrubber/src/garbage.rs @@ -19,8 +19,8 @@ use utils::id::TenantId; use crate::{ cloud_admin_api::{CloudAdminApiClient, MaybeDeleted, ProjectData}, - init_remote, init_remote_generic, list_objects_with_retries, - metadata_stream::{stream_tenant_timelines, stream_tenants}, + init_remote_generic, list_objects_with_retries_generic, + metadata_stream::{stream_tenant_timelines_generic, stream_tenants_generic}, BucketConfig, ConsoleConfig, NodeKind, TenantShardTimelineId, TraversingDepth, }; @@ -153,7 +153,7 @@ async fn find_garbage_inner( node_kind: NodeKind, ) -> anyhow::Result { // Construct clients for S3 and for Console API - let (s3_client, target) = init_remote(bucket_config.clone(), node_kind).await?; + let (remote_client, target) = init_remote_generic(bucket_config.clone(), node_kind).await?; let cloud_admin_api_client = Arc::new(CloudAdminApiClient::new(console_config)); // Build a set of console-known tenants, for quickly eliminating known-active tenants without having @@ -179,7 +179,7 @@ async fn find_garbage_inner( // Enumerate Tenants in S3, and check if each one exists in Console tracing::info!("Finding all tenants in bucket {}...", bucket_config.bucket); - let tenants = stream_tenants(&s3_client, &target); + let tenants = stream_tenants_generic(&remote_client, &target); let tenants_checked = tenants.map_ok(|t| { let api_client = cloud_admin_api_client.clone(); let console_cache = console_cache.clone(); @@ -237,25 +237,26 @@ async fn find_garbage_inner( // Special case: If it's missing in console, check for known bugs that would enable us to conclusively // identify it as purge-able anyway if console_result.is_none() { - let timelines = stream_tenant_timelines(&s3_client, &target, tenant_shard_id) - .await? - .collect::>() - .await; + let timelines = + stream_tenant_timelines_generic(&remote_client, &target, tenant_shard_id) + .await? + .collect::>() + .await; if timelines.is_empty() { // No timelines, but a heatmap: the deletion bug where we deleted everything but heatmaps - let tenant_objects = list_objects_with_retries( - &s3_client, + let tenant_objects = list_objects_with_retries_generic( + &remote_client, + ListingMode::WithDelimiter, &target.tenant_root(&tenant_shard_id), - None, ) .await?; - let object = tenant_objects.contents.as_ref().unwrap().first().unwrap(); - if object.key.as_ref().unwrap().ends_with("heatmap-v1.json") { + let object = tenant_objects.keys.first().unwrap(); + if object.key.get_path().as_str().ends_with("heatmap-v1.json") { tracing::info!("Tenant {tenant_shard_id}: is missing in console and is only a heatmap (known historic deletion bug)"); garbage.append_buggy(GarbageEntity::Tenant(tenant_shard_id)); continue; } else { - tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key.as_ref().unwrap()); + tracing::info!("Tenant {tenant_shard_id} is missing in console and contains one object: {}", object.key); } } else { // A console-unknown tenant with timelines: check if these timelines only contain initdb.tar.zst, from the initial @@ -264,24 +265,18 @@ async fn find_garbage_inner( for timeline_r in timelines { let timeline = timeline_r?; - let timeline_objects = list_objects_with_retries( - &s3_client, + let timeline_objects = list_objects_with_retries_generic( + &remote_client, + ListingMode::WithDelimiter, &target.timeline_root(&timeline), - None, ) .await?; - if timeline_objects - .common_prefixes - .as_ref() - .map(|v| v.len()) - .unwrap_or(0) - > 0 - { + if !timeline_objects.prefixes.is_empty() { // Sub-paths? Unexpected any_non_initdb = true; } else { - let object = timeline_objects.contents.as_ref().unwrap().first().unwrap(); - if object.key.as_ref().unwrap().ends_with("initdb.tar.zst") { + let object = timeline_objects.keys.first().unwrap(); + if object.key.get_path().as_str().ends_with("initdb.tar.zst") { tracing::info!("Timeline {timeline} contains only initdb.tar.zst"); } else { any_non_initdb = true; @@ -336,7 +331,8 @@ async fn find_garbage_inner( // Construct a stream of all timelines within active tenants let active_tenants = tokio_stream::iter(active_tenants.iter().map(Ok)); - let timelines = active_tenants.map_ok(|t| stream_tenant_timelines(&s3_client, &target, *t)); + let timelines = + active_tenants.map_ok(|t| stream_tenant_timelines_generic(&remote_client, &target, *t)); let timelines = timelines.try_buffer_unordered(S3_CONCURRENCY); let timelines = timelines.try_flatten(); diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index e0f154def3..152319b731 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -427,6 +427,7 @@ async fn list_objects_with_retries( Err(anyhow!("unreachable unless MAX_RETRIES==0")) } +/// Listing possibly large amounts of keys in a streaming fashion. fn stream_objects_with_retries<'a>( storage_client: &'a GenericRemoteStorage, listing_mode: ListingMode, @@ -465,6 +466,45 @@ fn stream_objects_with_retries<'a>( } } +/// If you want to list a bounded amount of prefixes or keys. For larger numbers of keys/prefixes, +/// use [`stream_objects_with_retries`] instead. +async fn list_objects_with_retries_generic( + remote_client: &GenericRemoteStorage, + listing_mode: ListingMode, + s3_target: &S3Target, +) -> anyhow::Result { + let cancel = CancellationToken::new(); + let prefix_str = &s3_target + .prefix_in_bucket + .strip_prefix("/") + .unwrap_or(&s3_target.prefix_in_bucket); + let prefix = RemotePath::from_string(prefix_str)?; + for trial in 0..MAX_RETRIES { + match remote_client + .list(Some(&prefix), listing_mode, None, &cancel) + .await + { + Ok(response) => return Ok(response), + Err(e) => { + if trial == MAX_RETRIES - 1 { + return Err(e) + .with_context(|| format!("Failed to list objects {MAX_RETRIES} times")); + } + error!( + "list_objects_v2 query failed: bucket_name={}, prefix={}, delimiter={}, error={}", + s3_target.bucket_name, + s3_target.prefix_in_bucket, + s3_target.delimiter, + DisplayErrorContext(e), + ); + let backoff_time = 1 << trial.max(5); + tokio::time::sleep(Duration::from_secs(backoff_time)).await; + } + } + } + panic!("MAX_RETRIES is not allowed to be 0"); +} + async fn download_object_with_retries( s3_client: &Client, bucket_name: &str, diff --git a/storage_scrubber/src/metadata_stream.rs b/storage_scrubber/src/metadata_stream.rs index 91dba3c992..c702c0c312 100644 --- a/storage_scrubber/src/metadata_stream.rs +++ b/storage_scrubber/src/metadata_stream.rs @@ -189,6 +189,63 @@ pub async fn stream_tenant_timelines<'a>( }) } +/// Given a `TenantShardId`, output a stream of the timelines within that tenant, discovered +/// using a listing. The listing is done before the stream is built, so that this +/// function can be used to generate concurrency on a stream using buffer_unordered. +pub async fn stream_tenant_timelines_generic<'a>( + remote_client: &'a GenericRemoteStorage, + target: &'a RootTarget, + tenant: TenantShardId, +) -> anyhow::Result> + 'a> { + let mut timeline_ids: Vec> = Vec::new(); + let timelines_target = target.timelines_root(&tenant); + + let mut objects_stream = std::pin::pin!(stream_objects_with_retries( + remote_client, + ListingMode::WithDelimiter, + &timelines_target + )); + loop { + tracing::debug!("Listing in {tenant}"); + let fetch_response = match objects_stream.next().await { + None => break, + Some(Err(e)) => { + timeline_ids.push(Err(e)); + break; + } + Some(Ok(r)) => r, + }; + + let new_entry_ids = fetch_response + .prefixes + .iter() + .filter_map(|prefix| -> Option<&str> { + prefix + .get_path() + .as_str() + .strip_prefix(&timelines_target.prefix_in_bucket)? + .strip_suffix('/') + }) + .map(|entry_id_str| { + entry_id_str + .parse::() + .with_context(|| format!("Incorrect entry id str: {entry_id_str}")) + }); + + for i in new_entry_ids { + timeline_ids.push(i); + } + } + + tracing::debug!("Yielding for {}", tenant); + Ok(stream! { + for i in timeline_ids { + let id = i?; + yield Ok(TenantShardTimelineId::new(tenant, id)); + } + }) +} + pub(crate) fn stream_listing<'a>( s3_client: &'a Client, target: &'a S3Target, From 8adc4031d0558d4c7390aacfeba9d1f91f0e41df Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Wed, 31 Jul 2024 19:47:59 +0100 Subject: [PATCH 23/37] CI(create-test-report): fix missing benchmark results in Allure report (#8540) ## Problem In https://github.com/neondatabase/neon/pull/8241 I've accidentally removed `create-test-report` dependency on `benchmarks` job ## Summary of changes - Run `create-test-report` after `benchmarks` job --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 50006dd3d4..c7ae2aedd4 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -309,7 +309,7 @@ jobs: SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} create-test-report: - needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image ] + needs: [ check-permissions, build-and-test-locally, coverage-report, build-build-tools-image, benchmarks ] if: ${{ !cancelled() && contains(fromJSON('["skipped", "success"]'), needs.check-permissions.result) }} outputs: report-url: ${{ steps.create-allure-report.outputs.report-url }} From 02e8fd0b527dbefb9e8fd7528d9f4cbff56e6cf0 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Wed, 31 Jul 2024 17:55:19 -0400 Subject: [PATCH 24/37] test(pageserver): add test_gc_feedback_with_snapshots (#8474) should be working after https://github.com/neondatabase/neon/pull/8328 gets merged. Part of https://github.com/neondatabase/neon/issues/8002 adds a new perf benchmark case that ensures garbages can be collected with branches --------- Signed-off-by: Alex Chi Z --- scripts/benchmark_durations.py | 1 + test_runner/performance/test_gc_feedback.py | 54 +++++++++++++++------ 2 files changed, 40 insertions(+), 15 deletions(-) diff --git a/scripts/benchmark_durations.py b/scripts/benchmark_durations.py index 01f34a1b96..4ca433679a 100755 --- a/scripts/benchmark_durations.py +++ b/scripts/benchmark_durations.py @@ -67,6 +67,7 @@ FALLBACK_DURATION = { "test_runner/performance/test_copy.py::test_copy[neon]": 13.817, "test_runner/performance/test_copy.py::test_copy[vanilla]": 11.736, "test_runner/performance/test_gc_feedback.py::test_gc_feedback": 575.735, + "test_runner/performance/test_gc_feedback.py::test_gc_feedback_with_snapshots": 575.735, "test_runner/performance/test_gist_build.py::test_gist_buffering_build[neon]": 14.868, "test_runner/performance/test_gist_build.py::test_gist_buffering_build[vanilla]": 14.393, "test_runner/performance/test_latency.py::test_measure_read_latency_heavy_write_workload[neon-1]": 20.588, diff --git a/test_runner/performance/test_gc_feedback.py b/test_runner/performance/test_gc_feedback.py index 4c326111c2..9861259c16 100644 --- a/test_runner/performance/test_gc_feedback.py +++ b/test_runner/performance/test_gc_feedback.py @@ -6,21 +6,8 @@ from fixtures.log_helper import log from fixtures.neon_fixtures import NeonEnvBuilder -@pytest.mark.timeout(10000) -def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): - """ - Test that GC is able to collect all old layers even if them are forming - "stairs" and there are not three delta layers since last image layer. - - Information about image layers needed to collect old layers should - be propagated by GC to compaction task which should take in in account - when make a decision which new image layers needs to be created. - - NB: this test demonstrates the problem. The source tree contained the - `gc_feedback` mechanism for about 9 months, but, there were problems - with it and it wasn't enabled at runtime. - This PR removed the code: https://github.com/neondatabase/neon/pull/6863 - """ +def gc_feedback_impl(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker, mode: str): + assert mode == "normal" or mode == "with_snapshots" env = neon_env_builder.init_start() client = env.pageserver.http_client() @@ -74,6 +61,9 @@ def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma physical_size = client.timeline_detail(tenant_id, timeline_id)["current_physical_size"] log.info(f"Physical storage size {physical_size}") + if mode == "with_snapshots": + if step == n_steps / 2: + env.neon_cli.create_branch("child") max_num_of_deltas_above_image = 0 max_total_num_of_deltas = 0 @@ -149,3 +139,37 @@ def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchma log.info(f"Writing layer map to {layer_map_path}") with layer_map_path.open("w") as f: f.write(json.dumps(client.timeline_layer_map_info(tenant_id, timeline_id))) + + +@pytest.mark.timeout(10000) +def test_gc_feedback(neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker): + """ + Test that GC is able to collect all old layers even if them are forming + "stairs" and there are not three delta layers since last image layer. + + Information about image layers needed to collect old layers should + be propagated by GC to compaction task which should take in in account + when make a decision which new image layers needs to be created. + + NB: this test demonstrates the problem. The source tree contained the + `gc_feedback` mechanism for about 9 months, but, there were problems + with it and it wasn't enabled at runtime. + This PR removed the code: https://github.com/neondatabase/neon/pull/6863 + + And the bottom-most GC-compaction epic resolves the problem. + https://github.com/neondatabase/neon/issues/8002 + """ + gc_feedback_impl(neon_env_builder, zenbenchmark, "normal") + + +@pytest.mark.timeout(10000) +def test_gc_feedback_with_snapshots( + neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker +): + """ + Compared with `test_gc_feedback`, we create a branch without written data (=snapshot) in the middle + of the benchmark, and the bottom-most compaction should collect as much garbage as possible below the GC + horizon. Ideally, there should be images (in an image layer) covering the full range at the branch point, + and images covering the full key range (in a delta layer) at the GC horizon. + """ + gc_feedback_impl(neon_env_builder, zenbenchmark, "with_snapshots") From 1e6a1ac9fa4a1cfbbffdbe93f69f45fd666f3c69 Mon Sep 17 00:00:00 2001 From: Christian Schwarz Date: Thu, 1 Aug 2024 07:57:09 +0200 Subject: [PATCH 25/37] pageserver: shutdown all walredo managers 8s into shutdown (#8572) # Motivation The working theory for hung systemd during PS deploy (https://github.com/neondatabase/cloud/issues/11387) is that leftover walredo processes trigger a race condition. In https://github.com/neondatabase/neon/pull/8150 I arranged that a clean Tenant shutdown does actually kill its walredo processes. But many prod machines don't manage to shut down all their tenants until the 10s systemd timeout hits and, presumably, triggers the race condition in systemd / the Linux kernel that causes the frozen systemd # Solution This PR bolts on a rather ugly mechanism to shut down tenant managers out of order 8s after we've received the SIGTERM from systemd. # Changes - add a global registry of `Weak` - add a special thread spawned during `shutdown_pageserver` that sleeps for 8s, then shuts down all redo managers in the registry and prevents new redo managers from being created - propagate the new failure mode of tenant spawning throughout the code base - make sure shut down tenant manager results in PageReconstructError::Cancelled so that if Timeline::get calls come in after the shutdown, they do the right thing --- pageserver/src/lib.rs | 83 ++++++++++++++++++++++++++++++- pageserver/src/tenant.rs | 82 ++++++++++++++++++++++++------ pageserver/src/tenant/mgr.rs | 38 ++++++++------ pageserver/src/tenant/timeline.rs | 22 +++++--- pageserver/src/walredo.rs | 29 ++++++++--- 5 files changed, 206 insertions(+), 48 deletions(-) diff --git a/pageserver/src/lib.rs b/pageserver/src/lib.rs index f729cad3c3..5aee13cfc6 100644 --- a/pageserver/src/lib.rs +++ b/pageserver/src/lib.rs @@ -12,6 +12,8 @@ pub mod disk_usage_eviction_task; pub mod http; pub mod import_datadir; pub mod l0_flush; + +use futures::{stream::FuturesUnordered, StreamExt}; pub use pageserver_api::keyspace; use tokio_util::sync::CancellationToken; pub mod aux_file; @@ -36,7 +38,7 @@ use tenant::{ mgr::{BackgroundPurges, TenantManager}, secondary, }; -use tracing::info; +use tracing::{info, info_span}; /// Current storage format version /// @@ -85,6 +87,79 @@ pub async fn shutdown_pageserver( exit_code: i32, ) { use std::time::Duration; + + // If the orderly shutdown below takes too long, we still want to make + // sure that all walredo processes are killed and wait()ed on by us, not systemd. + // + // (Leftover walredo processes are the hypothesized trigger for the systemd freezes + // that we keep seeing in prod => https://github.com/neondatabase/cloud/issues/11387. + // + // We use a thread instead of a tokio task because the background runtime is likely busy + // with the final flushing / uploads. This activity here has priority, and due to lack + // of scheduling priority feature sin the tokio scheduler, using a separate thread is + // an effective priority booster. + let walredo_extraordinary_shutdown_thread_span = { + let span = info_span!(parent: None, "walredo_extraordinary_shutdown_thread"); + span.follows_from(tracing::Span::current()); + span + }; + let walredo_extraordinary_shutdown_thread_cancel = CancellationToken::new(); + let walredo_extraordinary_shutdown_thread = std::thread::spawn({ + let walredo_extraordinary_shutdown_thread_cancel = + walredo_extraordinary_shutdown_thread_cancel.clone(); + move || { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + let _entered = rt.enter(); + let _entered = walredo_extraordinary_shutdown_thread_span.enter(); + if let Ok(()) = rt.block_on(tokio::time::timeout( + Duration::from_secs(8), + walredo_extraordinary_shutdown_thread_cancel.cancelled(), + )) { + info!("cancellation requested"); + return; + } + let managers = tenant::WALREDO_MANAGERS + .lock() + .unwrap() + // prevents new walredo managers from being inserted + .take() + .expect("only we take()"); + // Use FuturesUnordered to get in queue early for each manager's + // heavier_once_cell semaphore wait list. + // Also, for idle tenants that for some reason haven't + // shut down yet, it's quite likely that we're not going + // to get Poll::Pending once. + let mut futs: FuturesUnordered<_> = managers + .into_iter() + .filter_map(|(_, mgr)| mgr.upgrade()) + .map(|mgr| async move { tokio::task::unconstrained(mgr.shutdown()).await }) + .collect(); + info!(count=%futs.len(), "built FuturesUnordered"); + let mut last_log_at = std::time::Instant::now(); + #[derive(Debug, Default)] + struct Results { + initiated: u64, + already: u64, + } + let mut results = Results::default(); + while let Some(we_initiated) = rt.block_on(futs.next()) { + if we_initiated { + results.initiated += 1; + } else { + results.already += 1; + } + if last_log_at.elapsed() > Duration::from_millis(100) { + info!(remaining=%futs.len(), ?results, "progress"); + last_log_at = std::time::Instant::now(); + } + } + info!(?results, "done"); + } + }); + // Shut down the libpq endpoint task. This prevents new connections from // being accepted. let remaining_connections = timed( @@ -160,6 +235,12 @@ pub async fn shutdown_pageserver( Duration::from_secs(1), ) .await; + + info!("cancel & join walredo_extraordinary_shutdown_thread"); + walredo_extraordinary_shutdown_thread_cancel.cancel(); + walredo_extraordinary_shutdown_thread.join().unwrap(); + info!("walredo_extraordinary_shutdown_thread done"); + info!("Shut down successfully completed"); std::process::exit(exit_code); } diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 5d0e963b4e..0f09241d22 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -33,6 +33,7 @@ use remote_storage::GenericRemoteStorage; use remote_storage::TimeoutOrCancel; use std::collections::BTreeMap; use std::fmt; +use std::sync::Weak; use std::time::SystemTime; use storage_broker::BrokerClientChannel; use tokio::io::BufReader; @@ -312,14 +313,66 @@ impl std::fmt::Debug for Tenant { } pub(crate) enum WalRedoManager { - Prod(PostgresRedoManager), + Prod(WalredoManagerId, PostgresRedoManager), #[cfg(test)] Test(harness::TestRedoManager), } -impl From for WalRedoManager { - fn from(mgr: PostgresRedoManager) -> Self { - Self::Prod(mgr) +#[derive(thiserror::Error, Debug)] +#[error("pageserver is shutting down")] +pub(crate) struct GlobalShutDown; + +impl WalRedoManager { + pub(crate) fn new(mgr: PostgresRedoManager) -> Result, GlobalShutDown> { + let id = WalredoManagerId::next(); + let arc = Arc::new(Self::Prod(id, mgr)); + let mut guard = WALREDO_MANAGERS.lock().unwrap(); + match &mut *guard { + Some(map) => { + map.insert(id, Arc::downgrade(&arc)); + Ok(arc) + } + None => Err(GlobalShutDown), + } + } +} + +impl Drop for WalRedoManager { + fn drop(&mut self) { + match self { + Self::Prod(id, _) => { + let mut guard = WALREDO_MANAGERS.lock().unwrap(); + if let Some(map) = &mut *guard { + map.remove(id).expect("new() registers, drop() unregisters"); + } + } + #[cfg(test)] + Self::Test(_) => { + // Not applicable to test redo manager + } + } + } +} + +/// Global registry of all walredo managers so that [`crate::shutdown_pageserver`] can shut down +/// the walredo processes outside of the regular order. +/// +/// This is necessary to work around a systemd bug where it freezes if there are +/// walredo processes left => +#[allow(clippy::type_complexity)] +pub(crate) static WALREDO_MANAGERS: once_cell::sync::Lazy< + Mutex>>>, +> = once_cell::sync::Lazy::new(|| Mutex::new(Some(HashMap::new()))); +#[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)] +pub(crate) struct WalredoManagerId(u64); +impl WalredoManagerId { + pub fn next() -> Self { + static NEXT: std::sync::atomic::AtomicU64 = std::sync::atomic::AtomicU64::new(1); + let id = NEXT.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if id == 0 { + panic!("WalredoManagerId::new() returned 0, indicating wraparound, risking it's no longer unique"); + } + Self(id) } } @@ -331,19 +384,20 @@ impl From for WalRedoManager { } impl WalRedoManager { - pub(crate) async fn shutdown(&self) { + pub(crate) async fn shutdown(&self) -> bool { match self { - Self::Prod(mgr) => mgr.shutdown().await, + Self::Prod(_, mgr) => mgr.shutdown().await, #[cfg(test)] Self::Test(_) => { // Not applicable to test redo manager + true } } } pub(crate) fn maybe_quiesce(&self, idle_timeout: Duration) { match self { - Self::Prod(mgr) => mgr.maybe_quiesce(idle_timeout), + Self::Prod(_, mgr) => mgr.maybe_quiesce(idle_timeout), #[cfg(test)] Self::Test(_) => { // Not applicable to test redo manager @@ -363,7 +417,7 @@ impl WalRedoManager { pg_version: u32, ) -> Result { match self { - Self::Prod(mgr) => { + Self::Prod(_, mgr) => { mgr.request_redo(key, lsn, base_img, records, pg_version) .await } @@ -377,7 +431,7 @@ impl WalRedoManager { pub(crate) fn status(&self) -> Option { match self { - WalRedoManager::Prod(m) => Some(m.status()), + WalRedoManager::Prod(_, m) => Some(m.status()), #[cfg(test)] WalRedoManager::Test(_) => None, } @@ -677,11 +731,9 @@ impl Tenant { init_order: Option, mode: SpawnMode, ctx: &RequestContext, - ) -> Arc { - let wal_redo_manager = Arc::new(WalRedoManager::from(PostgresRedoManager::new( - conf, - tenant_shard_id, - ))); + ) -> Result, GlobalShutDown> { + let wal_redo_manager = + WalRedoManager::new(PostgresRedoManager::new(conf, tenant_shard_id))?; let TenantSharedResources { broker_client, @@ -880,7 +932,7 @@ impl Tenant { } .instrument(tracing::info_span!(parent: None, "attach", tenant_id=%tenant_shard_id.tenant_id, shard_id=%tenant_shard_id.shard_slug(), gen=?generation)), ); - tenant + Ok(tenant) } #[instrument(skip_all)] diff --git a/pageserver/src/tenant/mgr.rs b/pageserver/src/tenant/mgr.rs index 58f8990892..b5568d37b5 100644 --- a/pageserver/src/tenant/mgr.rs +++ b/pageserver/src/tenant/mgr.rs @@ -55,7 +55,7 @@ use utils::id::{TenantId, TimelineId}; use super::remote_timeline_client::remote_tenant_path; use super::secondary::SecondaryTenant; use super::timeline::detach_ancestor::PreparedTimelineDetach; -use super::TenantSharedResources; +use super::{GlobalShutDown, TenantSharedResources}; /// For a tenant that appears in TenantsMap, it may either be /// - `Attached`: has a full Tenant object, is elegible to service @@ -665,17 +665,20 @@ pub async fn init_tenant_mgr( let tenant_dir_path = conf.tenant_path(&tenant_shard_id); let shard_identity = location_conf.shard; let slot = match location_conf.mode { - LocationMode::Attached(attached_conf) => TenantSlot::Attached(tenant_spawn( - conf, - tenant_shard_id, - &tenant_dir_path, - resources.clone(), - AttachedTenantConf::new(location_conf.tenant_conf, attached_conf), - shard_identity, - Some(init_order.clone()), - SpawnMode::Lazy, - &ctx, - )), + LocationMode::Attached(attached_conf) => TenantSlot::Attached( + tenant_spawn( + conf, + tenant_shard_id, + &tenant_dir_path, + resources.clone(), + AttachedTenantConf::new(location_conf.tenant_conf, attached_conf), + shard_identity, + Some(init_order.clone()), + SpawnMode::Lazy, + &ctx, + ) + .expect("global shutdown during init_tenant_mgr cannot happen"), + ), LocationMode::Secondary(secondary_conf) => { info!( tenant_id = %tenant_shard_id.tenant_id, @@ -723,7 +726,7 @@ fn tenant_spawn( init_order: Option, mode: SpawnMode, ctx: &RequestContext, -) -> Arc { +) -> Result, GlobalShutDown> { // All these conditions should have been satisfied by our caller: the tenant dir exists, is a well formed // path, and contains a configuration file. Assertions that do synchronous I/O are limited to debug mode // to avoid impacting prod runtime performance. @@ -1190,7 +1193,10 @@ impl TenantManager { None, spawn_mode, ctx, - ); + ) + .map_err(|_: GlobalShutDown| { + UpsertLocationError::Unavailable(TenantMapError::ShuttingDown) + })?; TenantSlot::Attached(tenant) } @@ -1311,7 +1317,7 @@ impl TenantManager { None, SpawnMode::Eager, ctx, - ); + )?; slot_guard.upsert(TenantSlot::Attached(tenant))?; @@ -2045,7 +2051,7 @@ impl TenantManager { None, SpawnMode::Eager, ctx, - ); + )?; slot_guard.upsert(TenantSlot::Attached(tenant))?; diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index ecae443079..3a7353c138 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -76,6 +76,7 @@ use crate::{ metadata::TimelineMetadata, storage_layer::PersistentLayerDesc, }, + walredo, }; use crate::{ context::{DownloadBehavior, RequestContext}, @@ -1000,7 +1001,10 @@ impl Timeline { .for_get_kind(GetKind::Singular) .observe(elapsed.as_secs_f64()); - if cfg!(feature = "testing") && res.is_err() { + if cfg!(feature = "testing") + && res.is_err() + && !matches!(res, Err(PageReconstructError::Cancelled)) + { // it can only be walredo issue use std::fmt::Write; @@ -5466,20 +5470,22 @@ impl Timeline { } else { trace!("found {} WAL records that will init the page for {} at {}, performing WAL redo", data.records.len(), key, request_lsn); }; - - let img = match self + let res = self .walredo_mgr .as_ref() .context("timeline has no walredo manager") .map_err(PageReconstructError::WalRedo)? .request_redo(key, request_lsn, data.img, data.records, self.pg_version) - .await - .context("reconstruct a page image") - { + .await; + let img = match res { Ok(img) => img, - Err(e) => return Err(PageReconstructError::WalRedo(e)), + Err(walredo::Error::Cancelled) => return Err(PageReconstructError::Cancelled), + Err(walredo::Error::Other(e)) => { + return Err(PageReconstructError::WalRedo( + e.context("reconstruct a page image"), + )) + } }; - Ok(img) } } diff --git a/pageserver/src/walredo.rs b/pageserver/src/walredo.rs index 5095beefd7..770081b3b4 100644 --- a/pageserver/src/walredo.rs +++ b/pageserver/src/walredo.rs @@ -241,6 +241,9 @@ impl PostgresRedoManager { /// Shut down the WAL redo manager. /// + /// Returns `true` if this call was the one that initiated shutdown. + /// `true` may be observed by no caller if the first caller stops polling. + /// /// After this future completes /// - no redo process is running /// - no new redo process will be spawned @@ -250,22 +253,32 @@ impl PostgresRedoManager { /// # Cancel-Safety /// /// This method is cancellation-safe. - pub async fn shutdown(&self) { + pub async fn shutdown(&self) -> bool { // prevent new processes from being spawned - let permit = match self.redo_process.get_or_init_detached().await { + let maybe_permit = match self.redo_process.get_or_init_detached().await { Ok(guard) => { - let (proc, permit) = guard.take_and_deinit(); - drop(proc); // this just drops the Arc, its refcount may not be zero yet - permit + if matches!(&*guard, ProcessOnceCell::ManagerShutDown) { + None + } else { + let (proc, permit) = guard.take_and_deinit(); + drop(proc); // this just drops the Arc, its refcount may not be zero yet + Some(permit) + } } - Err(permit) => permit, + Err(permit) => Some(permit), + }; + let it_was_us = if let Some(permit) = maybe_permit { + self.redo_process + .set(ProcessOnceCell::ManagerShutDown, permit); + true + } else { + false }; - self.redo_process - .set(ProcessOnceCell::ManagerShutDown, permit); // wait for ongoing requests to drain and the refcounts of all Arc that // we ever launched to drop to zero, which when it happens synchronously kill()s & wait()s // for the underlying process. self.launched_processes.close().await; + it_was_us } /// This type doesn't have its own background task to check for idleness: we From a22361b57b3c831cf17a176c573f11b1debcbc69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Thu, 1 Aug 2024 10:22:21 +0200 Subject: [PATCH 26/37] Reduce linux-raw-sys duplication (#8577) Before, we had four versions of linux-raw-sys in our dependency graph: ``` linux-raw-sys@0.1.4 linux-raw-sys@0.3.8 linux-raw-sys@0.4.13 linux-raw-sys@0.6.4 ``` now it's only two: ``` linux-raw-sys@0.4.13 linux-raw-sys@0.6.4 ``` The changes in this PR are minimal. In order to get to its state one only has to update procfs in Cargo.toml to 0.16 and do `cargo update -p tempfile -p is-terminal -p prometheus`. --- Cargo.lock | 128 +++++++++-------------------------------------------- Cargo.toml | 2 +- 2 files changed, 21 insertions(+), 109 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2186d55e9c..e2e9ca3ed8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2710,17 +2710,6 @@ version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" -[[package]] -name = "io-lifetimes" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" -dependencies = [ - "hermit-abi", - "libc", - "windows-sys 0.48.0", -] - [[package]] name = "io-uring" version = "0.6.2" @@ -2739,14 +2728,13 @@ checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" [[package]] name = "is-terminal" -version = "0.4.7" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adcf93614601c8129ddf72e2d5633df827ba6551541c6d8c59520a371475be1f" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" dependencies = [ "hermit-abi", - "io-lifetimes", - "rustix 0.37.25", - "windows-sys 0.48.0", + "libc", + "windows-sys 0.52.0", ] [[package]] @@ -2872,18 +2860,6 @@ version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" -[[package]] -name = "linux-raw-sys" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f051f77a7c8e6957c0696eac88f26b0117e54f52d3fc682ab19397a8812846a4" - -[[package]] -name = "linux-raw-sys" -version = "0.3.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" - [[package]] name = "linux-raw-sys" version = "0.4.13" @@ -3001,7 +2977,7 @@ checksum = "7c4b80445aeb08e832d87bf1830049a924cdc1d6b7ef40b6b9b365bff17bf8ec" dependencies = [ "libc", "measured", - "procfs 0.16.0", + "procfs", ] [[package]] @@ -3046,7 +3022,7 @@ dependencies = [ "measured", "measured-process", "once_cell", - "procfs 0.14.2", + "procfs", "prometheus", "rand 0.8.5", "rand_distr", @@ -3593,7 +3569,7 @@ dependencies = [ "postgres_connection", "postgres_ffi", "pq_proto", - "procfs 0.14.2", + "procfs", "rand 0.8.5", "regex", "remote_storage", @@ -4139,21 +4115,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "procfs" -version = "0.14.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1de8dacb0873f77e6aefc6d71e044761fcc68060290f5b1089fcdf84626bb69" -dependencies = [ - "bitflags 1.3.2", - "byteorder", - "chrono", - "flate2", - "hex", - "lazy_static", - "rustix 0.36.16", -] - [[package]] name = "procfs" version = "0.16.0" @@ -4161,10 +4122,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "731e0d9356b0c25f16f33b5be79b1c57b562f141ebfcdb0ad8ac2c13a24293b4" dependencies = [ "bitflags 2.4.1", + "chrono", + "flate2", "hex", "lazy_static", "procfs-core", - "rustix 0.38.28", + "rustix", ] [[package]] @@ -4174,14 +4137,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d3554923a69f4ce04c4a754260c338f505ce22642d3830e049a399fc2059a29" dependencies = [ "bitflags 2.4.1", + "chrono", "hex", ] [[package]] name = "prometheus" -version = "0.13.3" +version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "449811d15fbdf5ceb5c1144416066429cf82316e2ec8ce0c1f6f8a02e7bbcf8c" +checksum = "3d33c28a30771f7f96db69893f78b857f7450d7e0237e9c8fc6427a81bae7ed1" dependencies = [ "cfg-if", "fnv", @@ -4189,7 +4153,7 @@ dependencies = [ "libc", "memchr", "parking_lot 0.12.1", - "procfs 0.14.2", + "procfs", "thiserror", ] @@ -4943,34 +4907,6 @@ dependencies = [ "nom", ] -[[package]] -name = "rustix" -version = "0.36.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6da3636faa25820d8648e0e31c5d519bbb01f72fdf57131f0f5f7da5fed36eab" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.1.4", - "windows-sys 0.45.0", -] - -[[package]] -name = "rustix" -version = "0.37.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4eb579851244c2c03e7c24f501c3432bed80b8f720af1d6e5b0e0f01555a035" -dependencies = [ - "bitflags 1.3.2", - "errno", - "io-lifetimes", - "libc", - "linux-raw-sys 0.3.8", - "windows-sys 0.48.0", -] - [[package]] name = "rustix" version = "0.38.28" @@ -5973,15 +5909,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.5.0" +version = "3.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9fbec84f381d5795b08656e4912bec604d162bff9291d6189a78f4c8ab87998" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" dependencies = [ "cfg-if", - "fastrand 1.9.0", - "redox_syscall 0.3.5", - "rustix 0.37.25", - "windows-sys 0.45.0", + "fastrand 2.0.0", + "redox_syscall 0.4.1", + "rustix", + "windows-sys 0.52.0", ] [[package]] @@ -7178,15 +7114,6 @@ dependencies = [ "windows_x86_64_msvc 0.42.2", ] -[[package]] -name = "windows-sys" -version = "0.45.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75283be5efb2831d37ea142365f009c02ec203cd29a3ebecbc093d52315b66d0" -dependencies = [ - "windows-targets 0.42.2", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -7205,21 +7132,6 @@ dependencies = [ "windows-targets 0.52.4", ] -[[package]] -name = "windows-targets" -version = "0.42.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e5180c00cd44c9b1c88adb3693291f1cd93605ded80c250a75d472756b4d071" -dependencies = [ - "windows_aarch64_gnullvm 0.42.2", - "windows_aarch64_msvc 0.42.2", - "windows_i686_gnu 0.42.2", - "windows_i686_msvc 0.42.2", - "windows_x86_64_gnu 0.42.2", - "windows_x86_64_gnullvm 0.42.2", - "windows_x86_64_msvc 0.42.2", -] - [[package]] name = "windows-targets" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 7749378114..af1c1dfc82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -126,7 +126,7 @@ parquet = { version = "51.0.0", default-features = false, features = ["zstd"] } parquet_derive = "51.0.0" pbkdf2 = { version = "0.12.1", features = ["simple", "std"] } pin-project-lite = "0.2" -procfs = "0.14" +procfs = "0.16" prometheus = {version = "0.13", default-features=false, features = ["process"]} # removes protobuf dependency prost = "0.11" rand = "0.8" From f51dc6a44e3d2ca9fb081f6fc806a80d33bb2e49 Mon Sep 17 00:00:00 2001 From: John Spray Date: Thu, 1 Aug 2024 10:25:35 +0100 Subject: [PATCH 27/37] pageserver: add layer visibility calculation (#8511) ## Problem We recently added a "visibility" state to layers, but nothing initializes it. Part of: - #8398 ## Summary of changes - Add a dependency on `range-set-blaze`, which is used as a fast incrementally updated alternative to KeySpace. We could also use this to replace the internals of KeySpaceRandomAccum if we wanted to. Writing a type that does this kind of "BtreeMap & merge overlapping entries" thing isn't super complicated, but no reason to write this ourselves when there's a third party impl available. - Add a function to layermap to calculate visibilities for each layer - Add a function to Timeline to call into layermap and then apply these visibilities to the Layer objects. - Invoke the calculation during startup, after image layer creations, and when removing branches. Branch removal and image layer creation are the two ways that a layer can go from Visible to Covered. - Add unit test & benchmark for the visibility calculation - Expose `pageserver_visible_physical_size` metric, which should always be <= `pageserver_remote_physical_size`. - This metric will feed into the /v1/utilization endpoint later: the visible size indicates how much space we would like to use on this pageserver for this tenant. - When `pageserver_visible_physical_size` is greater than `pageserver_resident_physical_size`, this is a sign that the tenant has long-idle branches, which result in layers that are visible in principle, but not used in practice. This does not keep visibility hints up to date in all cases: particularly, when creating a child timeline, any previously covered layers will not get marked Visible until they are accessed. Updates after image layer creation could be implemented as more of a special case, but this would require more new code: the existing depth calculation code doesn't maintain+yield the list of deltas that would be covered by an image layer. ## Performance This operation is done rarely (at startup and at timeline deletion), so needs to be efficient but not ultra-fast. There is a new `visibility` bench that measures runtime for a synthetic 100k layers case (`sequential`) and a real layer map (`real_map`) with ~26k layers. The benchmark shows runtimes of single digit milliseconds (on a ryzen 7950). This confirms that the runtime shouldn't be a problem at startup (as we already incur S3-level latencies there), but that it's slow enough that we definitely shouldn't call it more often than necessary, and it may be worthwhile to optimize further later (things like: when removing a branch, only bother scanning layers below the branchpoint) ``` visibility/sequential time: [4.5087 ms 4.5894 ms 4.6775 ms] change: [+2.0826% +3.9097% +5.8995%] (p = 0.00 < 0.05) Performance has regressed. Found 24 outliers among 100 measurements (24.00%) 2 (2.00%) high mild 22 (22.00%) high severe min: 0/1696070, max: 93/1C0887F0 visibility/real_map time: [7.0796 ms 7.0832 ms 7.0871 ms] change: [+0.3900% +0.4505% +0.5164%] (p = 0.00 < 0.05) Change within noise threshold. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe min: 0/1696070, max: 93/1C0887F0 visibility/real_map_many_branches time: [4.5285 ms 4.5355 ms 4.5434 ms] change: [-1.0012% -0.8004% -0.5969%] (p = 0.00 < 0.05) Change within noise threshold. ``` --- Cargo.lock | 56 ++- pageserver/Cargo.toml | 1 + pageserver/benches/bench_layer_map.rs | 78 ++- pageserver/src/metrics.rs | 15 + pageserver/src/tenant.rs | 2 +- pageserver/src/tenant/layer_map.rs | 474 +++++++++++++++++- .../layer_map/historic_layer_coverage.rs | 4 + pageserver/src/tenant/storage_layer.rs | 41 +- pageserver/src/tenant/storage_layer/layer.rs | 53 +- pageserver/src/tenant/timeline.rs | 28 +- pageserver/src/tenant/timeline/compaction.rs | 39 ++ pageserver/src/tenant/timeline/delete.rs | 9 +- .../indices/mixed_workload/README.md | 7 + .../indices/mixed_workload/index_part.json | 1 + test_runner/fixtures/metrics.py | 1 + 15 files changed, 729 insertions(+), 80 deletions(-) create mode 100644 pageserver/test_data/indices/mixed_workload/README.md create mode 100644 pageserver/test_data/indices/mixed_workload/index_part.json diff --git a/Cargo.lock b/Cargo.lock index e2e9ca3ed8..dc4f0c7b81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1418,7 +1418,7 @@ dependencies = [ "clap", "criterion-plot", "is-terminal", - "itertools", + "itertools 0.10.5", "num-traits", "once_cell", "oorandom", @@ -1439,7 +1439,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" dependencies = [ "cast", - "itertools", + "itertools 0.10.5", ] [[package]] @@ -2134,6 +2134,12 @@ dependencies = [ "slab", ] +[[package]] +name = "gen_ops" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "304de19db7028420975a296ab0fcbbc8e69438c4ed254a1e41e2a7f37d5f0e0a" + [[package]] name = "generic-array" version = "0.14.7" @@ -2746,6 +2752,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -3551,7 +3566,7 @@ dependencies = [ "humantime", "humantime-serde", "hyper 0.14.26", - "itertools", + "itertools 0.10.5", "leaky-bucket", "md5", "metrics", @@ -3571,6 +3586,7 @@ dependencies = [ "pq_proto", "procfs", "rand 0.8.5", + "range-set-blaze", "regex", "remote_storage", "reqwest 0.12.4", @@ -3621,7 +3637,7 @@ dependencies = [ "hex", "humantime", "humantime-serde", - "itertools", + "itertools 0.10.5", "postgres_ffi", "rand 0.8.5", "serde", @@ -3679,7 +3695,7 @@ dependencies = [ "hex-literal", "humantime", "humantime-serde", - "itertools", + "itertools 0.10.5", "metrics", "once_cell", "pageserver_api", @@ -4011,7 +4027,7 @@ name = "postgres_connection" version = "0.1.0" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "once_cell", "postgres", "tokio-postgres", @@ -4069,7 +4085,7 @@ version = "0.1.0" dependencies = [ "byteorder", "bytes", - "itertools", + "itertools 0.10.5", "pin-project-lite", "postgres-protocol", "rand 0.8.5", @@ -4175,7 +4191,7 @@ checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", "heck 0.4.1", - "itertools", + "itertools 0.10.5", "lazy_static", "log", "multimap", @@ -4196,7 +4212,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5d2d8d10f3c6ded6da8b05b5fb3b8a5082514344d56c9f871412d29b4e075b4" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 1.0.109", @@ -4253,7 +4269,7 @@ dependencies = [ "hyper-util", "indexmap 2.0.1", "ipnet", - "itertools", + "itertools 0.10.5", "lasso", "md5", "measured", @@ -4429,6 +4445,18 @@ dependencies = [ "rand_core 0.5.1", ] +[[package]] +name = "range-set-blaze" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8421b5d459262eabbe49048d362897ff3e3830b44eac6cfe341d6acb2f0f13d2" +dependencies = [ + "gen_ops", + "itertools 0.12.1", + "num-integer", + "num-traits", +] + [[package]] name = "rayon" version = "1.7.0" @@ -4597,7 +4625,7 @@ dependencies = [ "humantime", "humantime-serde", "hyper 0.14.26", - "itertools", + "itertools 0.10.5", "metrics", "once_cell", "pin-project-lite", @@ -5666,7 +5694,7 @@ dependencies = [ "hex", "humantime", "hyper 0.14.26", - "itertools", + "itertools 0.10.5", "lasso", "measured", "metrics", @@ -5732,7 +5760,7 @@ dependencies = [ "futures-util", "hex", "humantime", - "itertools", + "itertools 0.10.5", "once_cell", "pageserver", "pageserver_api", @@ -7361,7 +7389,7 @@ dependencies = [ "hmac", "hyper 0.14.26", "indexmap 1.9.3", - "itertools", + "itertools 0.10.5", "libc", "log", "memchr", diff --git a/pageserver/Cargo.toml b/pageserver/Cargo.toml index 0d9343d643..43976250a4 100644 --- a/pageserver/Cargo.toml +++ b/pageserver/Cargo.toml @@ -49,6 +49,7 @@ postgres_backend.workspace = true postgres-protocol.workspace = true postgres-types.workspace = true rand.workspace = true +range-set-blaze = { version = "0.1.16", features = ["alloc"] } regex.workspace = true scopeguard.workspace = true serde.workspace = true diff --git a/pageserver/benches/bench_layer_map.rs b/pageserver/benches/bench_layer_map.rs index 1d02aa7709..1353e79f7c 100644 --- a/pageserver/benches/bench_layer_map.rs +++ b/pageserver/benches/bench_layer_map.rs @@ -1,3 +1,4 @@ +use criterion::measurement::WallTime; use pageserver::keyspace::{KeyPartitioning, KeySpace}; use pageserver::repository::Key; use pageserver::tenant::layer_map::LayerMap; @@ -15,7 +16,11 @@ use utils::id::{TenantId, TimelineId}; use utils::lsn::Lsn; -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, BenchmarkGroup, Criterion}; + +fn fixture_path(relative: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative) +} fn build_layer_map(filename_dump: PathBuf) -> LayerMap { let mut layer_map = LayerMap::default(); @@ -109,7 +114,7 @@ fn uniform_key_partitioning(layer_map: &LayerMap, _lsn: Lsn) -> KeyPartitioning // between each test run. fn bench_from_captest_env(c: &mut Criterion) { // TODO consider compressing this file - let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt")); + let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt")); let queries: Vec<(Key, Lsn)> = uniform_query_pattern(&layer_map); // Test with uniform query pattern @@ -139,7 +144,7 @@ fn bench_from_captest_env(c: &mut Criterion) { fn bench_from_real_project(c: &mut Criterion) { // Init layer map let now = Instant::now(); - let layer_map = build_layer_map(PathBuf::from("benches/odd-brook-layernames.txt")); + let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt")); println!("Finished layer map init in {:?}", now.elapsed()); // Choose uniformly distributed queries @@ -242,7 +247,72 @@ fn bench_sequential(c: &mut Criterion) { group.finish(); } +fn bench_visibility_with_map( + group: &mut BenchmarkGroup, + layer_map: LayerMap, + read_points: Vec, + bench_name: &str, +) { + group.bench_function(bench_name, |b| { + b.iter(|| black_box(layer_map.get_visibility(read_points.clone()))); + }); +} + +// Benchmark using synthetic data. Arrange image layers on stacked diagonal lines. +fn bench_visibility(c: &mut Criterion) { + let mut group = c.benchmark_group("visibility"); + { + // Init layer map. Create 100_000 layers arranged in 1000 diagonal lines. + let now = Instant::now(); + let mut layer_map = LayerMap::default(); + let mut updates = layer_map.batch_update(); + for i in 0..100_000 { + let i32 = (i as u32) % 100; + let zero = Key::from_hex("000000000000000000000000000000000000").unwrap(); + let layer = PersistentLayerDesc::new_img( + TenantShardId::unsharded(TenantId::generate()), + TimelineId::generate(), + zero.add(10 * i32)..zero.add(10 * i32 + 1), + Lsn(i), + 0, + ); + updates.insert_historic(layer); + } + updates.flush(); + println!("Finished layer map init in {:?}", now.elapsed()); + + let mut read_points = Vec::new(); + for i in (0..100_000).step_by(1000) { + read_points.push(Lsn(i)); + } + + bench_visibility_with_map(&mut group, layer_map, read_points, "sequential"); + } + + { + let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt")); + let read_points = vec![Lsn(0x1C760FA190)]; + bench_visibility_with_map(&mut group, layer_map, read_points, "real_map"); + + let layer_map = build_layer_map(fixture_path("benches/odd-brook-layernames.txt")); + let read_points = vec![ + Lsn(0x1C760FA190), + Lsn(0x000000931BEAD539), + Lsn(0x000000931BF63011), + Lsn(0x000000931B33AE68), + Lsn(0x00000038E67ABFA0), + Lsn(0x000000931B33AE68), + Lsn(0x000000914E3F38F0), + Lsn(0x000000931B33AE68), + ]; + bench_visibility_with_map(&mut group, layer_map, read_points, "real_map_many_branches"); + } + + group.finish(); +} + criterion_group!(group_1, bench_from_captest_env); criterion_group!(group_2, bench_from_real_project); criterion_group!(group_3, bench_sequential); -criterion_main!(group_1, group_2, group_3); +criterion_group!(group_4, bench_visibility); +criterion_main!(group_1, group_2, group_3, group_4); diff --git a/pageserver/src/metrics.rs b/pageserver/src/metrics.rs index ede6b41a75..cd2cd43f27 100644 --- a/pageserver/src/metrics.rs +++ b/pageserver/src/metrics.rs @@ -525,6 +525,15 @@ static RESIDENT_PHYSICAL_SIZE: Lazy = Lazy::new(|| { .expect("failed to define a metric") }); +static VISIBLE_PHYSICAL_SIZE: Lazy = Lazy::new(|| { + register_uint_gauge_vec!( + "pageserver_visible_physical_size", + "The size of the layer files present in the pageserver's filesystem.", + &["tenant_id", "shard_id", "timeline_id"] + ) + .expect("failed to define a metric") +}); + pub(crate) static RESIDENT_PHYSICAL_SIZE_GLOBAL: Lazy = Lazy::new(|| { register_uint_gauge!( "pageserver_resident_physical_size_global", @@ -2204,6 +2213,7 @@ pub(crate) struct TimelineMetrics { pub(crate) layer_count_delta: UIntGauge, pub standby_horizon_gauge: IntGauge, pub resident_physical_size_gauge: UIntGauge, + pub visible_physical_size_gauge: UIntGauge, /// copy of LayeredTimeline.current_logical_size pub current_logical_size_gauge: UIntGauge, pub aux_file_size_gauge: IntGauge, @@ -2326,6 +2336,9 @@ impl TimelineMetrics { let resident_physical_size_gauge = RESIDENT_PHYSICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) .unwrap(); + let visible_physical_size_gauge = VISIBLE_PHYSICAL_SIZE + .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) + .unwrap(); // TODO: we shouldn't expose this metric let current_logical_size_gauge = CURRENT_LOGICAL_SIZE .get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id]) @@ -2380,6 +2393,7 @@ impl TimelineMetrics { layer_count_delta, standby_horizon_gauge, resident_physical_size_gauge, + visible_physical_size_gauge, current_logical_size_gauge, aux_file_size_gauge, directory_entries_count_gauge, @@ -2431,6 +2445,7 @@ impl TimelineMetrics { RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get()); let _ = RESIDENT_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); } + let _ = VISIBLE_PHYSICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); let _ = CURRENT_LOGICAL_SIZE.remove_label_values(&[tenant_id, shard_id, timeline_id]); if let Some(metric) = Lazy::get(&DIRECTORY_ENTRIES_COUNT) { let _ = metric.remove_label_values(&[tenant_id, shard_id, timeline_id]); diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 0f09241d22..b9257dfbe8 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -1634,7 +1634,7 @@ impl Tenant { self: Arc, timeline_id: TimelineId, ) -> Result<(), DeleteTimelineError> { - DeleteTimelineFlow::run(&self, timeline_id, false).await?; + DeleteTimelineFlow::run(&self, timeline_id).await?; Ok(()) } diff --git a/pageserver/src/tenant/layer_map.rs b/pageserver/src/tenant/layer_map.rs index 6f150a2d5c..ba9c08f6e7 100644 --- a/pageserver/src/tenant/layer_map.rs +++ b/pageserver/src/tenant/layer_map.rs @@ -51,7 +51,8 @@ use crate::keyspace::KeyPartitioning; use crate::repository::Key; use crate::tenant::storage_layer::InMemoryLayer; use anyhow::Result; -use pageserver_api::keyspace::KeySpaceAccum; +use pageserver_api::keyspace::{KeySpace, KeySpaceAccum}; +use range_set_blaze::{CheckSortedDisjoint, RangeSetBlaze}; use std::collections::{HashMap, VecDeque}; use std::iter::Peekable; use std::ops::Range; @@ -61,7 +62,7 @@ use utils::lsn::Lsn; use historic_layer_coverage::BufferedHistoricLayerCoverage; pub use historic_layer_coverage::LayerKey; -use super::storage_layer::PersistentLayerDesc; +use super::storage_layer::{LayerVisibilityHint, PersistentLayerDesc}; /// /// LayerMap tracks what layers exist on a timeline. @@ -871,11 +872,183 @@ impl LayerMap { println!("End dump LayerMap"); Ok(()) } + + /// `read_points` represent the tip of a timeline and any branch points, i.e. the places + /// where we expect to serve reads. + /// + /// This function is O(N) and should be called infrequently. The caller is responsible for + /// looking up and updating the Layer objects for these layer descriptors. + pub fn get_visibility( + &self, + mut read_points: Vec, + ) -> ( + Vec<(Arc, LayerVisibilityHint)>, + KeySpace, + ) { + // This is like a KeySpace, but this type is intended for efficient unions with image layer ranges, whereas + // KeySpace is intended to be composed statically and iterated over. + struct KeyShadow { + // Map of range start to range end + inner: RangeSetBlaze, + } + + impl KeyShadow { + fn new() -> Self { + Self { + inner: Default::default(), + } + } + + fn contains(&self, range: Range) -> bool { + let range_incl = range.start.to_i128()..=range.end.to_i128() - 1; + self.inner.is_superset(&RangeSetBlaze::from_sorted_disjoint( + CheckSortedDisjoint::from([range_incl]), + )) + } + + /// Add the input range to the keys covered by self. + /// + /// Return true if inserting this range covered some keys that were previously not covered + fn cover(&mut self, insert: Range) -> bool { + let range_incl = insert.start.to_i128()..=insert.end.to_i128() - 1; + self.inner.ranges_insert(range_incl) + } + + fn reset(&mut self) { + self.inner = Default::default(); + } + + fn to_keyspace(&self) -> KeySpace { + let mut accum = KeySpaceAccum::new(); + for range_incl in self.inner.ranges() { + let range = Range { + start: Key::from_i128(*range_incl.start()), + end: Key::from_i128(range_incl.end() + 1), + }; + accum.add_range(range) + } + + accum.to_keyspace() + } + } + + // The 'shadow' will be updated as we sweep through the layers: an image layer subtracts from the shadow, + // and a ReadPoint + read_points.sort_by_key(|rp| rp.0); + let mut shadow = KeyShadow::new(); + + // We will interleave all our read points and layers into a sorted collection + enum Item { + ReadPoint { lsn: Lsn }, + Layer(Arc), + } + + let mut items = Vec::with_capacity(self.historic.len() + read_points.len()); + items.extend(self.iter_historic_layers().map(Item::Layer)); + items.extend( + read_points + .into_iter() + .map(|rp| Item::ReadPoint { lsn: rp }), + ); + + // Ordering: we want to iterate like this: + // 1. Highest LSNs first + // 2. Consider images before deltas if they end at the same LSNs (images cover deltas) + // 3. Consider ReadPoints before image layers if they're at the same LSN (readpoints make that image visible) + items.sort_by_key(|item| { + std::cmp::Reverse(match item { + Item::Layer(layer) => { + if layer.is_delta() { + (Lsn(layer.get_lsn_range().end.0 - 1), 0) + } else { + (layer.image_layer_lsn(), 1) + } + } + Item::ReadPoint { lsn } => (*lsn, 2), + }) + }); + + let mut results = Vec::with_capacity(self.historic.len()); + + let mut maybe_covered_deltas: Vec> = Vec::new(); + + for item in items { + let (reached_lsn, is_readpoint) = match &item { + Item::ReadPoint { lsn } => (lsn, true), + Item::Layer(layer) => (&layer.lsn_range.start, false), + }; + maybe_covered_deltas.retain(|d| { + if *reached_lsn >= d.lsn_range.start && is_readpoint { + // We encountered a readpoint within the delta layer: it is visible + + results.push((d.clone(), LayerVisibilityHint::Visible)); + false + } else if *reached_lsn < d.lsn_range.start { + // We passed the layer's range without encountering a read point: it is not visible + results.push((d.clone(), LayerVisibilityHint::Covered)); + false + } else { + // We're still in the delta layer: continue iterating + true + } + }); + + match item { + Item::ReadPoint { lsn: _lsn } => { + // TODO: propagate the child timeline's shadow from their own run of this function, so that we don't have + // to assume that the whole key range is visible at the branch point. + shadow.reset(); + } + Item::Layer(layer) => { + let visibility = if layer.is_delta() { + if shadow.contains(layer.get_key_range()) { + // If a layer isn't visible based on current state, we must defer deciding whether + // it is truly not visible until we have advanced past the delta's range: we might + // encounter another branch point within this delta layer's LSN range. + maybe_covered_deltas.push(layer); + continue; + } else { + LayerVisibilityHint::Visible + } + } else { + let modified = shadow.cover(layer.get_key_range()); + if modified { + // An image layer in a region which wasn't fully covered yet: this layer is visible, but layers below it will be covered + LayerVisibilityHint::Visible + } else { + // An image layer in a region that was already covered + LayerVisibilityHint::Covered + } + }; + + results.push((layer, visibility)); + } + } + } + + // Drain any remaining maybe_covered deltas + results.extend( + maybe_covered_deltas + .into_iter() + .map(|d| (d, LayerVisibilityHint::Covered)), + ); + + (results, shadow.to_keyspace()) + } } #[cfg(test)] mod tests { - use pageserver_api::keyspace::KeySpace; + use crate::tenant::{storage_layer::LayerName, IndexPart}; + use pageserver_api::{ + key::DBDIR_KEY, + keyspace::{KeySpace, KeySpaceRandomAccum}, + }; + use std::{collections::HashMap, path::PathBuf}; + use utils::{ + id::{TenantId, TimelineId}, + shard::TenantShardId, + }; use super::*; @@ -1002,4 +1175,299 @@ mod tests { } } } + + #[test] + fn layer_visibility_basic() { + // A simple synthetic input, as a smoke test. + let tenant_shard_id = TenantShardId::unsharded(TenantId::generate()); + let timeline_id = TimelineId::generate(); + let mut layer_map = LayerMap::default(); + let mut updates = layer_map.batch_update(); + + const FAKE_LAYER_SIZE: u64 = 1024; + + let inject_delta = |updates: &mut BatchedUpdates, + key_start: i128, + key_end: i128, + lsn_start: u64, + lsn_end: u64| { + let desc = PersistentLayerDesc::new_delta( + tenant_shard_id, + timeline_id, + Range { + start: Key::from_i128(key_start), + end: Key::from_i128(key_end), + }, + Range { + start: Lsn(lsn_start), + end: Lsn(lsn_end), + }, + 1024, + ); + updates.insert_historic(desc.clone()); + desc + }; + + let inject_image = + |updates: &mut BatchedUpdates, key_start: i128, key_end: i128, lsn: u64| { + let desc = PersistentLayerDesc::new_img( + tenant_shard_id, + timeline_id, + Range { + start: Key::from_i128(key_start), + end: Key::from_i128(key_end), + }, + Lsn(lsn), + FAKE_LAYER_SIZE, + ); + updates.insert_historic(desc.clone()); + desc + }; + + // + // Construct our scenario: the following lines go in backward-LSN order, constructing the various scenarios + // we expect to handle. You can follow these examples through in the same order as they would be processed + // by the function under test. + // + + let mut read_points = vec![Lsn(1000)]; + + // A delta ahead of any image layer + let ahead_layer = inject_delta(&mut updates, 10, 20, 101, 110); + + // An image layer is visible and covers some layers beneath itself + let visible_covering_img = inject_image(&mut updates, 5, 25, 99); + + // A delta layer covered by the image layer: should be covered + let covered_delta = inject_delta(&mut updates, 10, 20, 90, 100); + + // A delta layer partially covered by an image layer: should be visible + let partially_covered_delta = inject_delta(&mut updates, 1, 7, 90, 100); + + // A delta layer not covered by an image layer: should be visible + let not_covered_delta = inject_delta(&mut updates, 1, 4, 90, 100); + + // An image layer covered by the image layer above: should be covered + let covered_image = inject_image(&mut updates, 10, 20, 89); + + // An image layer partially covered by an image layer: should be visible + let partially_covered_image = inject_image(&mut updates, 1, 7, 89); + + // An image layer not covered by an image layer: should be visible + let not_covered_image = inject_image(&mut updates, 1, 4, 89); + + // A read point: this will make subsequent layers below here visible, even if there are + // more recent layers covering them. + read_points.push(Lsn(80)); + + // A delta layer covered by an earlier image layer, but visible to a readpoint below that covering layer + let covered_delta_below_read_point = inject_delta(&mut updates, 10, 20, 70, 79); + + // A delta layer whose end LSN is covered, but where a read point is present partway through its LSN range: + // the read point should make it visible, even though its end LSN is covered + let covering_img_between_read_points = inject_image(&mut updates, 10, 20, 69); + let covered_delta_between_read_points = inject_delta(&mut updates, 10, 15, 67, 69); + read_points.push(Lsn(65)); + let covered_delta_intersects_read_point = inject_delta(&mut updates, 15, 20, 60, 69); + + let visible_img_after_last_read_point = inject_image(&mut updates, 10, 20, 65); + + updates.flush(); + + let (layer_visibilities, shadow) = layer_map.get_visibility(read_points); + let layer_visibilities = layer_visibilities.into_iter().collect::>(); + + assert_eq!( + layer_visibilities.get(&ahead_layer), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&visible_covering_img), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&covered_delta), + Some(&LayerVisibilityHint::Covered) + ); + assert_eq!( + layer_visibilities.get(&partially_covered_delta), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(¬_covered_delta), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&covered_image), + Some(&LayerVisibilityHint::Covered) + ); + assert_eq!( + layer_visibilities.get(&partially_covered_image), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(¬_covered_image), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&covered_delta_below_read_point), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&covering_img_between_read_points), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&covered_delta_between_read_points), + Some(&LayerVisibilityHint::Covered) + ); + assert_eq!( + layer_visibilities.get(&covered_delta_intersects_read_point), + Some(&LayerVisibilityHint::Visible) + ); + assert_eq!( + layer_visibilities.get(&visible_img_after_last_read_point), + Some(&LayerVisibilityHint::Visible) + ); + + // Shadow should include all the images below the last read point + let expected_shadow = KeySpace { + ranges: vec![Key::from_i128(10)..Key::from_i128(20)], + }; + assert_eq!(shadow, expected_shadow); + } + + fn fixture_path(relative: &str) -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(relative) + } + + #[test] + fn layer_visibility_realistic() { + // Load a large example layermap + let index_raw = std::fs::read_to_string(fixture_path( + "test_data/indices/mixed_workload/index_part.json", + )) + .unwrap(); + let index: IndexPart = serde_json::from_str::(&index_raw).unwrap(); + + let tenant_id = TenantId::generate(); + let tenant_shard_id = TenantShardId::unsharded(tenant_id); + let timeline_id = TimelineId::generate(); + + let mut layer_map = LayerMap::default(); + let mut updates = layer_map.batch_update(); + for (layer_name, layer_metadata) in index.layer_metadata { + let layer_desc = match layer_name { + LayerName::Image(layer_name) => PersistentLayerDesc { + key_range: layer_name.key_range.clone(), + lsn_range: layer_name.lsn_as_range(), + tenant_shard_id, + timeline_id, + is_delta: false, + file_size: layer_metadata.file_size, + }, + LayerName::Delta(layer_name) => PersistentLayerDesc { + key_range: layer_name.key_range, + lsn_range: layer_name.lsn_range, + tenant_shard_id, + timeline_id, + is_delta: true, + file_size: layer_metadata.file_size, + }, + }; + updates.insert_historic(layer_desc); + } + updates.flush(); + + let read_points = vec![index.metadata.disk_consistent_lsn()]; + let (layer_visibilities, shadow) = layer_map.get_visibility(read_points); + for (layer_desc, visibility) in &layer_visibilities { + tracing::info!("{layer_desc:?}: {visibility:?}"); + eprintln!("{layer_desc:?}: {visibility:?}"); + } + + // The shadow should be non-empty, since there were some image layers + assert!(!shadow.ranges.is_empty()); + + // At least some layers should be marked covered + assert!(layer_visibilities + .iter() + .any(|i| matches!(i.1, LayerVisibilityHint::Covered))); + + let layer_visibilities = layer_visibilities.into_iter().collect::>(); + + // Brute force validation: a layer should be marked covered if and only if there are image layers above it in LSN order which cover it + for (layer_desc, visible) in &layer_visibilities { + let mut coverage = KeySpaceRandomAccum::new(); + let mut covered_by = Vec::new(); + + for other_layer in layer_map.iter_historic_layers() { + if &other_layer == layer_desc { + continue; + } + if !other_layer.is_delta() + && other_layer.image_layer_lsn() >= Lsn(layer_desc.get_lsn_range().end.0 - 1) + && other_layer.key_range.start <= layer_desc.key_range.end + && layer_desc.key_range.start <= other_layer.key_range.end + { + coverage.add_range(other_layer.get_key_range()); + covered_by.push((*other_layer).clone()); + } + } + let coverage = coverage.to_keyspace(); + + let expect_visible = if coverage.ranges.len() == 1 + && coverage.contains(&layer_desc.key_range.start) + && coverage.contains(&Key::from_i128(layer_desc.key_range.end.to_i128() - 1)) + { + LayerVisibilityHint::Covered + } else { + LayerVisibilityHint::Visible + }; + + if expect_visible != *visible { + eprintln!( + "Layer {}..{} @ {}..{} (delta={}) is {visible:?}, should be {expect_visible:?}", + layer_desc.key_range.start, + layer_desc.key_range.end, + layer_desc.lsn_range.start, + layer_desc.lsn_range.end, + layer_desc.is_delta() + ); + if expect_visible == LayerVisibilityHint::Covered { + eprintln!("Covered by:"); + for other in covered_by { + eprintln!( + " {}..{} @ {}", + other.get_key_range().start, + other.get_key_range().end, + other.image_layer_lsn() + ); + } + if let Some(range) = coverage.ranges.first() { + eprintln!( + "Total coverage from contributing layers: {}..{}", + range.start, range.end + ); + } else { + eprintln!( + "Total coverage from contributing layers: {:?}", + coverage.ranges + ); + } + } + } + assert_eq!(expect_visible, *visible); + } + + // Sanity: the layer that holds latest data for the DBDIR key should always be visible + // (just using this key as a key that will always exist for any layermap fixture) + let dbdir_layer = layer_map + .search(DBDIR_KEY, index.metadata.disk_consistent_lsn()) + .unwrap(); + assert!(matches!( + layer_visibilities.get(&dbdir_layer.layer).unwrap(), + LayerVisibilityHint::Visible + )); + } } diff --git a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs index 347490c1ba..136f68bc36 100644 --- a/pageserver/src/tenant/layer_map/historic_layer_coverage.rs +++ b/pageserver/src/tenant/layer_map/historic_layer_coverage.rs @@ -521,6 +521,10 @@ impl BufferedHistoricLayerCoverage { Ok(&self.historic_coverage) } + + pub(crate) fn len(&self) -> usize { + self.layers.len() + } } #[test] diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index f931341aca..4fd110359b 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -451,20 +451,14 @@ pub enum ValueReconstructResult { /// than an authoritative value, so that we do not have to update it synchronously when changing the visibility /// of layers (for example when creating a branch that makes some previously covered layers visible). It should /// be used for cache management but not for correctness-critical checks. -#[derive(Default, Debug, Clone, PartialEq, Eq)] -pub(crate) enum LayerVisibilityHint { +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum LayerVisibilityHint { /// A Visible layer might be read while serving a read, because there is not an image layer between it /// and a readable LSN (the tip of the branch or a child's branch point) Visible, /// A Covered layer probably won't be read right now, but _can_ be read in future if someone creates /// a branch or ephemeral endpoint at an LSN below the layer that covers this. - #[allow(unused)] Covered, - /// Calculating layer visibilty requires I/O, so until this has happened layers are loaded - /// in this state. Note that newly written layers may be called Visible immediately, this uninitialized - /// state is for when existing layers are constructed while loading a timeline. - #[default] - Uninitialized, } pub(crate) struct LayerAccessStats(std::sync::atomic::AtomicU64); @@ -626,23 +620,30 @@ impl LayerAccessStats { } } - pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) { - let value = match visibility { - LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT, - LayerVisibilityHint::Covered | LayerVisibilityHint::Uninitialized => 0x0, - }; - - self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value); - } - - pub(crate) fn visibility(&self) -> LayerVisibilityHint { - let read = self.0.load(std::sync::atomic::Ordering::Relaxed); - match (read >> Self::VISIBILITY_SHIFT) & 0x1 { + /// Helper for extracting the visibility hint from the literal value of our inner u64 + fn decode_visibility(&self, bits: u64) -> LayerVisibilityHint { + match (bits >> Self::VISIBILITY_SHIFT) & 0x1 { 1 => LayerVisibilityHint::Visible, 0 => LayerVisibilityHint::Covered, _ => unreachable!(), } } + + /// Returns the old value which has been replaced + pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) -> LayerVisibilityHint { + let value = match visibility { + LayerVisibilityHint::Visible => 0x1 << Self::VISIBILITY_SHIFT, + LayerVisibilityHint::Covered => 0x0, + }; + + let old_bits = self.write_bits(0x1 << Self::VISIBILITY_SHIFT, value); + self.decode_visibility(old_bits) + } + + pub(crate) fn visibility(&self) -> LayerVisibilityHint { + let read = self.0.load(std::sync::atomic::Ordering::Relaxed); + self.decode_visibility(read) + } } /// Get a layer descriptor from a layer. diff --git a/pageserver/src/tenant/storage_layer/layer.rs b/pageserver/src/tenant/storage_layer/layer.rs index 1075feb1d1..5732779e44 100644 --- a/pageserver/src/tenant/storage_layer/layer.rs +++ b/pageserver/src/tenant/storage_layer/layer.rs @@ -24,7 +24,8 @@ use super::delta_layer::{self, DeltaEntry}; use super::image_layer::{self}; use super::{ AsLayerDesc, ImageLayerWriter, LayerAccessStats, LayerAccessStatsReset, LayerName, - PersistentLayerDesc, ValueReconstructResult, ValueReconstructState, ValuesReconstructState, + LayerVisibilityHint, PersistentLayerDesc, ValueReconstructResult, ValueReconstructState, + ValuesReconstructState, }; use utils::generation::Generation; @@ -246,7 +247,7 @@ impl Layer { &timeline.generation, ); - let layer = LayerInner::new( + LayerInner::new( conf, timeline, local_path, @@ -254,14 +255,7 @@ impl Layer { Some(inner), timeline.generation, timeline.get_shard_index(), - ); - - // Newly created layers are marked visible by default: the usual case is that they were created to be read. - layer - .access_stats - .set_visibility(super::LayerVisibilityHint::Visible); - - layer + ) })); let downloaded = resident.expect("just initialized"); @@ -493,6 +487,32 @@ impl Layer { } } } + + pub(crate) fn set_visibility(&self, visibility: LayerVisibilityHint) { + let old_visibility = self.access_stats().set_visibility(visibility.clone()); + use LayerVisibilityHint::*; + match (old_visibility, visibility) { + (Visible, Covered) => { + // Subtract this layer's contribution to the visible size metric + if let Some(tl) = self.0.timeline.upgrade() { + tl.metrics + .visible_physical_size_gauge + .sub(self.0.desc.file_size) + } + } + (Covered, Visible) => { + // Add this layer's contribution to the visible size metric + if let Some(tl) = self.0.timeline.upgrade() { + tl.metrics + .visible_physical_size_gauge + .add(self.0.desc.file_size) + } + } + (Covered, Covered) | (Visible, Visible) => { + // no change + } + } + } } /// The download-ness ([`DownloadedLayer`]) can be either resident or wanted evicted. @@ -693,6 +713,13 @@ impl Drop for LayerInner { timeline.metrics.layer_count_image.dec(); timeline.metrics.layer_size_image.sub(self.desc.file_size); } + + if matches!(self.access_stats.visibility(), LayerVisibilityHint::Visible) { + timeline + .metrics + .visible_physical_size_gauge + .sub(self.desc.file_size); + } } if !*self.wanted_deleted.get_mut() { @@ -801,6 +828,12 @@ impl LayerInner { timeline.metrics.layer_size_image.add(desc.file_size); } + // New layers are visible by default. This metric is later updated on drop or in set_visibility + timeline + .metrics + .visible_physical_size_gauge + .add(desc.file_size); + LayerInner { conf, debug_str: { diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 3a7353c138..37ebeded66 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -2736,6 +2736,10 @@ impl Timeline { // Tenant::create_timeline will wait for these uploads to happen before returning, or // on retry. + // Now that we have the full layer map, we may calculate the visibility of layers within it (a global scan) + drop(guard); // drop write lock, update_layer_visibility will take a read lock. + self.update_layer_visibility().await; + info!( "loaded layer map with {} layers at {}, total physical size: {}", num_layers, disk_consistent_lsn, total_physical_size @@ -4677,27 +4681,6 @@ impl Timeline { } } - // The writer.finish() above already did the fsync of the inodes. - // We just need to fsync the directory in which these inodes are linked, - // which we know to be the timeline directory. - if !image_layers.is_empty() { - // We use fatal_err() below because the after writer.finish() returns with success, - // the in-memory state of the filesystem already has the layer file in its final place, - // and subsequent pageserver code could think it's durable while it really isn't. - let timeline_dir = VirtualFile::open( - &self - .conf - .timeline_path(&self.tenant_shard_id, &self.timeline_id), - ctx, - ) - .await - .fatal_err("VirtualFile::open for timeline dir fsync"); - timeline_dir - .sync_all() - .await - .fatal_err("VirtualFile::sync_all timeline dir"); - } - let mut guard = self.layers.write().await; // FIXME: we could add the images to be uploaded *before* returning from here, but right @@ -4706,6 +4689,9 @@ impl Timeline { drop_wlock(guard); timer.stop_and_record(); + // Creating image layers may have caused some previously visible layers to be covered + self.update_layer_visibility().await; + Ok(image_layers) } diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 5e9ff1c9e4..4fe9bbafab 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -443,6 +443,45 @@ impl Timeline { Ok(()) } + /// Update the LayerVisibilityHint of layers covered by image layers, based on whether there is + /// an image layer between them and the most recent readable LSN (branch point or tip of timeline). The + /// purpose of the visibility hint is to record which layers need to be available to service reads. + /// + /// The result may be used as an input to eviction and secondary downloads to de-prioritize layers + /// that we know won't be needed for reads. + pub(super) async fn update_layer_visibility(&self) { + let head_lsn = self.get_last_record_lsn(); + + // We will sweep through layers in reverse-LSN order. We only do historic layers. L0 deltas + // are implicitly left visible, because LayerVisibilityHint's default is Visible, and we never modify it here. + // Note that L0 deltas _can_ be covered by image layers, but we consider them 'visible' because we anticipate that + // they will be subject to L0->L1 compaction in the near future. + let layer_manager = self.layers.read().await; + let layer_map = layer_manager.layer_map(); + + let readable_points = { + let children = self.gc_info.read().unwrap().retain_lsns.clone(); + + let mut readable_points = Vec::with_capacity(children.len() + 1); + for (child_lsn, _child_timeline_id) in &children { + readable_points.push(*child_lsn); + } + readable_points.push(head_lsn); + readable_points + }; + + let (layer_visibility, covered) = layer_map.get_visibility(readable_points); + for (layer_desc, visibility) in layer_visibility { + // FIXME: a more efficiency bulk zip() through the layers rather than NlogN getting each one + let layer = layer_manager.get_from_desc(&layer_desc); + layer.set_visibility(visibility); + } + + // TODO: publish our covered KeySpace to our parent, so that when they update their visibility, they can + // avoid assuming that everything at a branch point is visible. + drop(covered); + } + /// Collect a bunch of Level 0 layer files, and compact and reshuffle them as /// as Level 1 files. Returns whether the L0 layers are fully compacted. async fn compact_level0( diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index ab6a5f20ba..9b2403f899 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -206,11 +206,10 @@ impl DeleteTimelineFlow { // NB: If this fails half-way through, and is retried, the retry will go through // all the same steps again. Make sure the code here is idempotent, and don't // error out if some of the shutdown tasks have already been completed! - #[instrument(skip_all, fields(%inplace))] + #[instrument(skip_all)] pub async fn run( tenant: &Arc, timeline_id: TimelineId, - inplace: bool, ) -> Result<(), DeleteTimelineError> { super::debug_assert_current_span_has_tenant_and_timeline_id(); @@ -235,11 +234,7 @@ impl DeleteTimelineFlow { ))? }); - if inplace { - Self::background(guard, tenant.conf, tenant, &timeline).await? - } else { - Self::schedule_background(guard, tenant.conf, Arc::clone(tenant), timeline); - } + Self::schedule_background(guard, tenant.conf, Arc::clone(tenant), timeline); Ok(()) } diff --git a/pageserver/test_data/indices/mixed_workload/README.md b/pageserver/test_data/indices/mixed_workload/README.md new file mode 100644 index 0000000000..724274fcd9 --- /dev/null +++ b/pageserver/test_data/indices/mixed_workload/README.md @@ -0,0 +1,7 @@ + +# This was captured from one shard of a large tenant in staging. + +# It has a mixture of deltas and image layers, >1000 layers in total. + +# This is suitable for general smoke tests that want an index which is not +# trivially small, but doesn't contain weird/pathological cases. diff --git a/pageserver/test_data/indices/mixed_workload/index_part.json b/pageserver/test_data/indices/mixed_workload/index_part.json new file mode 100644 index 0000000000..cb4bfc4726 --- /dev/null +++ b/pageserver/test_data/indices/mixed_workload/index_part.json @@ -0,0 +1 @@ +{"version":7,"layer_metadata":{"000000067F00004005000060F300069883DB-000000067F00004005000060F300069D13FA__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300039A4000-000000067F00004005000060F300039C0000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300039FC000-000000067F00004005000060F30003A0F066__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000082C0F1-000000067F000040050081DB43000086E169__000000A583FBFB91-000000A9EB8C4489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000478000-000000067F00004005000060F3000047C000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000012C000-000000067F00004005000060F300001F0000__0000018624969468":{"file_size":134422528,"generation":7,"shard":"0008"},"000000067F00004005000060F700019E8000-000000067F00004005000060F700019EC000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300018E0FE6-000000067F00004005000060F3000193A10B__00000075CC373F31-00000079F2A2F311":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016E85370000004000-030000000000000000000000000000000002__0000018613F0A050":{"file_size":14172160,"generation":3,"shard":"0008"},"000000067F00004005000060F300034847BD-000000067F00004005000060F300034BD86C__000000EBC9213D59-000000EFA7EAA9E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C80000-000000067F000040050081DB430000C84000__000000BDAFECFC00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100000CCBA0-000000067F00004005000060F20100000000__0000000D80565628":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000CA4000-000000067F00004005016EA00C0000CE0000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB00013BC000-000000067F00004005000060FB0001400000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001240000-000000067F00004005016EA00C0001244000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30004EC52E9-000000067F00004005000060F30004F1638A__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000E10000-000000067F000040050081DB430000E14000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000007F0F-000000067F0000400500EB4A480000037E20__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004FE8000-000000067F00004005000060F3000502905D__0000014784964B91-0000014B000D1821":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB43000072C000-000000067F000040050081DB430000768000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005E3B48F-000000067F00004005000060F30005EF454F__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A100000B7E04-030000000000000000000000000000000002__000000E7C2F1B249-000000EBC9213D59":{"file_size":30146560,"generation":2,"shard":"0008"},"000000067F0000400501025D90000009029B-000000067F0000400501025D950100000000__0000011B688FEDC8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A10000-000000067F000040050081DB430000A14000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002F5105E-000000067F00004005000060F30002F9A0EB__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000187FE22-000000067F000040050081D80C0100000000__00000075E5D2A930":{"file_size":59138048,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001E8000-000000067F000040050081DB4300001EC000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000184C000-000000067F00004005000060FB000187FE22__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005A16504-000000067F00004005000060F30005A57691__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100005C0000-000000067F00004005000060F100005C821A__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-000000067F00004005000060F00300000000__000001BCB572A4E0":{"file_size":2310144,"generation":17,"shard":"0008"},"000000067F00004005000060F30002214000-000000067F00004005000060F30002264247__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A10000110000-000000067F0000400500E3A2A10000114000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006864000-000000067F00004005000060F30006868000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000D0000-000000067F0000400500DBCED500000D4000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000274C000-000000067F00004005000060F30002790000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00009274AB-030000000000000000000000000000000002__000001935283F9B9-00000196C9018F59":{"file_size":60104704,"generation":11,"shard":"0008"},"000000067F0000400500C782E4000023D359-000000067F0000400500C782E400002A5E4B__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001780DB7-000000067F00004005000060F700017E1391__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F000040050081DB4300004E4000-000000067F000040050081DB4300004F8000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00018C0000-000000067F00004005016EA00C00018C4000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F300056DC000-000000067F00004005000060F300056E0000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001F14230-000000067F000040050081D80C0100000000__0000018613F0A050":{"file_size":59138048,"generation":3,"shard":"0008"},"000000067F00004005010F9F120000004000-030000000000000000000000000000000002__0000012E77D3BF00":{"file_size":105775104,"generation":2,"shard":"0008"},"000000067F00004005000060F30002D80000-000000067F00004005000060F30002D84000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000122BBF-000000067F00004005000060F7000013B18E__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002B10000-000000067F00004005000060F30002B88FF2__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006320C60-000000067F00004005000060F30006349DA2__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000079E393-000000067F00004005016EA00C00009BF728__00000196C9018F59-0000019A2EAFE7A9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500F67839000005C000-000000067F0000400500F67839000006AEF4__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D7F71A-030000000000000000000000000000000002__000001BA93C39481-000001BCB572A4E1":{"file_size":50880512,"generation":17,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C481-000001BCB572C5D9":{"file_size":24576,"generation":20,"shard":"0008"},"000000067F00004005000060F70001570000-000000067F00004005000060F70001574000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000042C000-000000067F00004005000060F30000478000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C5D9-000001BCB572DFF9":{"file_size":24576,"generation":22,"shard":"0008"},"000000067F00004005000060FB00015FCD31-030000000000000000000000000000000002__000000698F2C3A38":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30005C841ED-000000067F00004005000060F30005C95225__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001B4A119-000000067F00004005000060F30100000000__0000008196C976A1-0000008625CF2891":{"file_size":200990720,"generation":2,"shard":"0008"},"000000067F00004005000060F300019790A2-000000067F00004005000060F300019C2056__00000079F2A2F311-0000007E3A9BFD29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001838000-000000067F00004005000060FB000183C000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001C00FE1-000000067F00004005000060F30001C0A0A3__0000008625CF2891-00000089F4693119":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300056E0000-000000067F00004005000060F300056E4000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000BBD532-000000067F00004005000060F80100000000__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":96477184,"generation":2,"shard":"0008"},"000000067F00004005000060F30000F9B026-000000067F00004005000060F30100000000__00000047E31D98D1-0000004C49155071":{"file_size":173834240,"generation":2,"shard":"0008"},"000000067F000040050081DB430000500000-000000067F000040050081DB430000504000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004971675-000000067F00004005000060F300049B26A8__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003102107-000000067F00004005000060F300031130BC__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300048A4000-000000067F00004005000060F30004900000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004B8000-000000067F00004005016EA00C00004BC000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001A71688-000000067F00004005000060FB0001A8A1CD__0000007E3A9BFD29-0000008196C976A1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E60000-000000067F00004005000060F30000E64000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300023B0FF7-000000067F00004005000060F300024020ED__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00003F8000-000000067F00004005016EA00C00003FC000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30004B2B250-000000067F00004005000060F30004B5431C__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000050000-000000067F00004005000060F700000885C5__000000044854EBD1-00000008B6B51879":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB000097168A-030000000000000000000000000000000002__00000028C365FBE1-0000002D2A8E0B81":{"file_size":120299520,"generation":2,"shard":"0008"},"000000067F00004005000060F3000625C000-000000067F00004005000060F30006270000__0000017171761D90":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001BA8000-000000067F00004005000060FB0001BC0B44__0000008625CF2891-00000089F4693119":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003344134-000000067F00004005000060F3000336D193__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006B10FFF-000000067F00004005000060F30006B22072__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006E34000-000000067F00004005000060F30006E70000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000008238C-000000067F00004005000060F60100000000__00000139CF156B58":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000A30000-000000067F00004005000060F70100000000__0000009DF02C1241-000000A173C00489":{"file_size":269688832,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001CE16ED-000000067F000040050081D80C0100000000__0000008DDCD70B68":{"file_size":59138048,"generation":2,"shard":"0008"},"000000067F000040050081DB4300011B0000-000000067F000040050081DB4300011B4000__000000DBD29DC248":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C000010C0D1-000000067F0000400500F3A25C000011E137__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000004000-000000067F00004005000060F70000029ED0__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005000060F60000058F73-000000067F00004005000060F60100000000__000000E4D847F4E0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001C3F636-000000067F00004005016EA00C0001CC74D7__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500EB4A480000101089-000000067F0000400500EB4A48000012798C__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300007A8000-000000067F000040050081DB4300007AC000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F1000010043F-000000067F00004005000060F20100000000__0000000D55A212C9-000000114A805939":{"file_size":182878208,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001EAC000-000000067F00004005000060FB0001F14230__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000616F6B2-000000067F00004005000060F300061B8705__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005C9E3C4-000000067F00004005000060F30005CCF3C5__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001AA0000-000000067F00004005000060F70001AB05CB__0000015304A396B9-0000015670D6AFD9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F3000073C000-000000067F00004005000060F30000775A02__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300003AE21D-000000067F000040050081DB43000045029C__0000008DBE2855F9-000000923719A971":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B04000-000000067F00004005000060F70001B18000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E74000-000000067F00004005000060F30000E78000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000182C000-000000067F00004005000060F700018871D6__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000DE8B45-000000067F00004005000060FB0000DF968A__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E78000-000000067F00004005000060F30000E7C000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000140C000-030000000000000000000000000000000002__000000603CA8F2F0":{"file_size":89522176,"generation":2,"shard":"0008"},"000000067F00004005000060FB00011CA1CD-000000067F00004005000060FB00011F2D11__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000144FB4E-000000067F00004005016EA00C00014B79E7__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F700015A195C-000000067F00004005000060F80100000000__0000012E77D3BF00":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000FC0000-000000067F00004005000060F70000FC4000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000012798C-000000067F0000400500EB4A48000013F89B__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CE4000-000000067F00004005016EA00C0001D18000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F30005FC519A-000000067F00004005000060F30005FE621A__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000370000-000000067F00004005016EA00C0000374000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0001760000-000000067F00004005016EA00C0001764000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F100003A0000-000000067F00004005000060F100003B8214__0000003D03FCCDB9-000000417D21ACF9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300006B0000-000000067F00004005000060F300006B4000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00004E1FF6-030000000000000000000000000000000002__000000174479FC18":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F3000502905D-000000067F00004005000060F300050321C0__0000014784964B91-0000014B000D1821":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001AB05CB-000000067F00004005000060F70001AB8B97__0000015304A396B9-0000015670D6AFD9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000151F7C5-000000067F00004005016EA00C000158F667__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F70000B9C000-000000067F00004005000060F80100000000__000000AFE87558B0":{"file_size":83533824,"generation":2,"shard":"0008"},"000000067F00004005000060F7000141882A-000000067F00004005000060F80100000000__00000122E1129DA0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000018F5CD-000000067F0000400500EB4A48000019F4DD__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000196C000-000000067F00004005000060F70001990000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300029C623C-000000067F00004005000060F30100000000__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":81313792,"generation":2,"shard":"0008"},"000000067F00004005000060F300027C0000-000000067F00004005000060F300027C4000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FB3D300000001487-000000067F0000400500FB3D300100000000__0000010FB1BE19B9-00000113456156F1":{"file_size":24428544,"generation":2,"shard":"0008"},"000000067F00004005000060F300056D8000-000000067F00004005000060F300056DC000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700003C0000-000000067F00004005000060F700003C4000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000664E3CA-000000067F00004005000060F30100000000__000001715E483C79-000001751A7D7589":{"file_size":288645120,"generation":2,"shard":"0008"},"000000067F000040050100D04D000004B5AD-000000067F000040050100D04D00000634BB__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500DBCED5000002C000-000000067F0000400500DBCED50000078000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000C20000-000000067F00004005016EA00C0000C24000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70001B30000-000000067F00004005000060F70001B34000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700009C035C-000000067F00004005000060F80100000000__0000009A1ABDE921-0000009DF02C1241":{"file_size":264159232,"generation":2,"shard":"0008"},"000000067F00004005000060F30003B33945-000000067F00004005000060F30100000000__0000010FB1BE19B9-00000113456156F1":{"file_size":155344896,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000079FCFA-000000067F00004005016EA00C00007C7B9C__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500EB4A480000218000-000000067F0000400500EB4A48000021C000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005D1D0DC-000000067F00004005000060F30005D76250__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000149B774-000000067F00004005000060FB00014A42B8__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003D0B155-000000067F00004005000060F30003D14206__00000117EDA82C11-0000011B632CC319":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300020FC052-000000067F00004005000060F300021050B0__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002268000-000000067F00004005000060F300022B9050__000000A583FBFB91-000000A9EB8C4489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300004FC000-000000067F000040050081DB430000500000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300060A93B5-000000067F00004005000060F300060C2210__0000016834A3FC91-0000016B49A934C1":{"file_size":263479296,"generation":2,"shard":"0008"},"000000067F00004005000060F3000674C000-000000067F00004005000060F30006798000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300007F913A-030000000000000000000000000000000002__000000A5A3F27398":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000F4000-030000000000000000000000000000000002__000000E4D847F4E0":{"file_size":103907328,"generation":2,"shard":"0008"},"000000067F00004005000060F70001348000-000000067F00004005000060F70100000000__0000011B632CC319-0000011F1A40FA69":{"file_size":270753792,"generation":2,"shard":"0008"},"000000067F00004005000060F10000030000-000000067F00004005000060F20100000000__000000021DC73119-000000044854EBD1":{"file_size":267771904,"generation":2,"shard":"0008"},"000000067F000040050107B54701FFFFFFFF-000000067F000040050107B5470300000000__0000011F1A40FA69-00000122A7BB7B29":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30006674000-000000067F00004005000060F30006690000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050107B54701FFFFFFFF-000000067F000040050107B5470300000000__0000011B632CC319-0000011F1A40FA69":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30000298000-000000067F00004005000060F3000029C000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000F185D4-000000067F00004005000060F80100000000__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":249135104,"generation":2,"shard":"0008"},"000000067F00004005000060F300049CB712-000000067F00004005000060F30004A048A8__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700004B1E77-000000067F00004005000060F80100000000__00000047F1F2B800":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30004B00000-000000067F00004005000060F30004B1111A__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006D14000-000000067F00004005000060F30006D30000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00002D77AE-030000000000000000000000000000000002__000001880F984A29-0000018C496B6DB1":{"file_size":81018880,"generation":11,"shard":"0008"},"000000067F00004005000060F300002D0000-000000067F00004005000060F30000370FD1__0000000D55A212C9-000000114A805939":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D69D790000028000-000000067F0000400500D69D79000002C000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002170000-000000067F00004005000060F30002174000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000F59017-000000067F00004005000060F30000F91FFF__00000047E31D98D1-0000004C49155071":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F6000006A37A-000000067F00004005000060F60100000000__000001180B3FF408":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F6000002F012-000000067F00004005000060F60100000000__00000081AA3C40F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30005614000-000000067F00004005000060F30005688000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300036C8000-000000067F00004005000060F300036F91FE__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001ADF63C-030000000000000000000000000000000002__000001B3E1B95181-000001B6FFE46BC9":{"file_size":64421888,"generation":11,"shard":"0008"},"000000067F0000400500EB4A480000057D31-000000067F0000400500EB4A48000008FC41__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F58000-000000067F00004005016EA00C0000F5C000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000908000-000000067F000040050081DB43000094A076__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000471200E-000000067F00004005000060F3000474302B__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300000403DA-030000000000000000000000000000000002__00000075E5D2A930":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F60000079C4E-000000067F00004005000060F60100000000__0000012E77D3BF00":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500F67839000003C000-000000067F0000400500F678390000058000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001C80000-000000067F00004005000060FB0001C84000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300047F5138-000000067F00004005000060F3000480620C__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006B5C09E-000000067F00004005000060F30006BAD108__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001410F57-000000067F00004005000060F70001429534__00000122A7BB7B29-0000012694E36301":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00006B4000-000000067F00004005016EA00C00006E0000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700009605D8-000000067F00004005000060F80100000000__000000923719A971-00000096262826C9":{"file_size":251338752,"generation":2,"shard":"0008"},"000000067F00004005000060F70000C8CD0C-000000067F00004005000060F80100000000__000000BAC0041E18":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700012B8000-000000067F00004005000060F80100000000__00000113456156F1-00000117EDA82C11":{"file_size":265781248,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000049C000-000000067F00004005016EA00C00004A8000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70000C78000-000000067F00004005000060F70000C7C000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006B4B0BB-000000067F00004005000060F30006B5C09E__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001844000-000000067F00004005000060FB0001848000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300067F0000-000000067F00004005000060F300067F4000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C80000-000000067F00004005000060F30004C84000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A4C000-000000067F00004005000060F30002A98000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002480000-000000067F00004005000060F30002484000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000306A02D-000000067F00004005000060F30100000000__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":191299584,"generation":2,"shard":"0008"},"000000067F00004005000060F70001510000-000000067F00004005000060F70001514000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005BDB15B-000000067F00004005000060F30005C841ED__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001E98000-000000067F00004005000060FB0001E9C000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300057942F4-000000067F00004005000060F300057DD292__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005698000-000000067F00004005000060F3000569C000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002983166-000000067F00004005000060F3000299C28F__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000C24000-000000067F00004005016EA00C0000CA0000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F300033D7D7C-000000067F00004005000060F30003458D42__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A1C000-000000067F000040050081DB430000A30379__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002D93639-000000067F00004005000060F50100000000__000000D037B2DBD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000029C195-000000067F00004005016EA00C000029C196__000001BA93C39481-000001BCB572A4E1":{"file_size":32768,"generation":17,"shard":"0008"},"000000067F00004005000060F30000A5F9BB-000000067F00004005000060F60100000000__000000321AA80270":{"file_size":81657856,"generation":2,"shard":"0008"},"000000067F00004005000060F30002D84000-000000067F00004005000060F30002D93639__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005D1C000-000000067F00004005000060F30005D70000__000001684518AF20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010C8000-000000067F000040050081DB4300010E2072__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB43000058AF5E-000000067F000040050081DB4300005BCFD7__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000034611E-000000067F00004005000060F80100000000__000000321AA80270":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300000C1095-000000067F00004005000060F60100000000__000000021DC73119-000000044854EBD1":{"file_size":220635136,"generation":2,"shard":"0008"},"000000067F00004005000060FB000183C000-000000067F00004005000060FB0001840000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006C8729E-000000067F00004005000060F30006C98340__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005138000-000000067F00004005000060F3000513C000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300053E30C3-000000067F00004005000060F300053F40CC__0000014EC58A4A79-0000015304A396B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB43000002C000-000000067F000040050081DB4300000403DA__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004970000-000000067F00004005000060F30004974000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003C08000-000000067F00004005000060F30003C0C000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000103AD12-000000067F00004005000060FB000104B856__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004AC000-000000067F00004005016EA00C00004B8000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000DB7D33-000000067F00004005016EA00C0000E47BD2__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30001F30000-000000067F00004005000060F30001F34000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050109FFA2000000C000-030000000000000000000000000000000002__000001180B3FF408":{"file_size":70516736,"generation":2,"shard":"0008"},"000000067F00004005000060F700017405D4-000000067F00004005000060F70001758B92__000001398B56A519-0000013C9C0E3339":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300030B0000-000000067F00004005000060F300030C0FE5__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005010660F501FFFFFFFF-000000067F00004005010660F50300000000__00000122A7BB7B29-0000012694E36301":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002168000-000000067F00004005000060F3000216C000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F60000046A83-000000067F00004005000060F60100000000__000000BAC0041E18":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001368000-000000067F00004005000060FB000136C000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000184000-000000067F00004005000060F80100000000__000000174479FC18":{"file_size":93143040,"generation":2,"shard":"0008"},"000000067F00004005000060FB00012A8000-000000067F00004005000060FB0100000000__00000057593D8169-0000005C01565329":{"file_size":273711104,"generation":2,"shard":"0008"},"000000067F00004005000060F700007B0000-000000067F00004005000060F700007D05C8__00000075CC373F31-00000079F2A2F311":{"file_size":268468224,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001680B45-000000067F00004005000060FB000169968A__000000698AF6E809-0000006DDB29D589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300050CC000-000000067F00004005000060F300050E8000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-000000067F00004005000060F00300000000__0000018613F0A050":{"file_size":2310144,"generation":3,"shard":"0008"},"000000067F00004005000060F70001B1C000-000000067F00004005000060F70001B30000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000F50000-000000067F00004005000060F70000F705D6__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F000040050109CD330100000000-000000067F000040050109FFA2000000C000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800001FC000-000000067F0000400500EB4A480000200000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000240B12A-000000067F00004005000060F300024440AE__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000008228D-000000067F00004005000060F60100000000__000000027AF9D7D0":{"file_size":24576,"generation":1,"shard":"0008"},"000000067F00004005016EA00C000042C000-000000067F00004005016EA00C0000478000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB0000FF8000-000000067F00004005000060FB0001000B44__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000169968A-000000067F00004005000060FB00016D21CF__000000698AF6E809-0000006DDB29D589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100005F821C-000000067F00004005000060F20100000000__000000636DE92159-000000663565F8C9":{"file_size":149954560,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D7C000-000000067F00004005016EA00C0001E03DD8__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F0000400500F678390000058000-000000067F0000400500F67839000005C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800003A7E20-000000067F0000400500EB4A4800003BFD31__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001228000-000000067F00004005016EA00C000122C000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000F0C0E9-000000067F000040050081DB430000F4E15B__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000758000-000000067F00004005000060F80100000000__0000006DDB29D589-000000722F474369":{"file_size":264781824,"generation":2,"shard":"0008"},"000000067F00004005000060F300068640AF-000000067F00004005000060F3000686D0DE__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000047C000-000000067F00004005016EA00C0000498000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30006166575-000000067F00004005000060F3000616F6B2__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B18000-000000067F00004005000060F70001B1C000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700016EC000-000000067F00004005000060F70001708000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005CCF3C5-000000067F00004005000060F30005D184F6__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002848000-000000067F00004005000060F3000285901B__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300039C0000-000000067F00004005000060F300039C4000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002464000-000000067F00004005000060F30002480000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00011D0000-000000067F00004005016EA00C00011D4000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003D44283-000000067F00004005000060F30003D952B0__0000011B632CC319-0000011F1A40FA69":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480100000000-000000067F0000400500EE16BC0000044000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000533205E-000000067F00004005000060F300053E30C3__0000014EC58A4A79-0000015304A396B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F6000009A255-000000067F00004005000060F60300000000__0000017CC2FD7288":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B00000-000000067F00004005000060F70001B04000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004958000-000000067F00004005000060F3000495C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000518000-000000067F00004005000060F80100000000__0000004C49155071-0000004F31878919":{"file_size":262373376,"generation":2,"shard":"0008"},"000000067F00004005000060F300064D8000-000000067F00004005000060F3000658113F__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FDA1F80000014000-000000067F0000400500FDA1F80000020D42__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000284000-000000067F00004005000060FB00002D4B6A__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000CDBB9C-000000067F00004005000060F80100000000__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":148865024,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001298000-000000067F00004005016EA00C000129C000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001DD8000-000000067F00004005000060FB0001DF0B43__000000923719A971-00000096262826C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001220000-000000067F00004005000060F70001224000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002908000-000000067F00004005000060F30002920FA0__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F5C000-000000067F00004005016EA00C0000F90000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0001E03DD8-030000000000000000000000000000000002__000001BCB572A4E0":{"file_size":139264,"generation":17,"shard":"0008"},"000000067F00004005000060F30003998000-000000067F00004005000060F3000399C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00014E75C6-030000000000000000000000000000000002__000001A931C135B1-000001AC25760149":{"file_size":51486720,"generation":11,"shard":"0008"},"000000067F00004005010660F500000F44CB-000000067F00004005010660F70100000000__000001180B3FF408":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00003FC000-000000067F00004005016EA00C0000400000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003810000-000000067F00004005000060F30003849093__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006B00000-000000067F00004005000060F30006B10FFF__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001541688-000000067F00004005000060FB000154A1CD__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001098000-000000067F00004005000060FB000109C000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700011912D4-000000067F00004005000060F80100000000__00000104BD37F348":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A40000-000000067F00004005000060F30002A44000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001448000-000000067F00004005000060F300014B0F7B__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001009688-000000067F00004005000060FB000102A1CE__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00001A4000-000000067F0000400500EE16BC00001E0000__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000B58B45-000000067F00004005000060FB0000B6168A__0000003579F03331-0000003959DA2DE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000AC000-000000067F0000400500D69D7900000BDAF5__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000193A10B-000000067F00004005000060F30100000000__00000075CC373F31-00000079F2A2F311":{"file_size":198148096,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00005A0000-000000067F00004005016EA00C00005A4000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700000E0000-000000067F00004005000060F80100000000__0000000D80565628":{"file_size":112009216,"generation":2,"shard":"0008"},"000000067F00004005000060F3000690F2FD-000000067F00004005000060F300069883DB__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300004C6B83-000000067F00004005000060F60100000000__000000174479FC18":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001E18000-000000067F00004005000060F30001E50FF3__000000923719A971-00000096262826C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300043B4000-000000067F00004005000060F300043B8000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100006C0000-000000067F00004005000060F20100000000__000000722F474369-00000075CC373F31":{"file_size":267665408,"generation":2,"shard":"0008"},"000000067F00004005000060F70000A78000-000000067F00004005000060F70000A7C000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00011C1688-000000067F00004005000060FB00011CA1CD__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004E8000-000000067F00004005016EA00C00004EC000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000257A6F-000000067F00004005016EA00C000029F90B__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001590000-000000067F00004005000060FB0001594000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000193189A-030000000000000000000000000000000002__000001B3F17FE4E0":{"file_size":139264,"generation":11,"shard":"0008"},"000000067F00004005000060F300027C4000-000000067F00004005000060F30002828000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000B40000-000000067F00004005016EA00C0000B44000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30006694000-000000067F00004005000060F300066F0000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015C8000-000000067F00004005000060FB00015CC000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003B84000-000000067F00004005000060F30003B90000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006704000-000000067F00004005000060F30006748000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000793506-030000000000000000000000000000000002__0000002427BD8BD0":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30004F1638A-000000067F00004005000060F30100000000__000001440D3D0C69-0000014784964B91":{"file_size":93708288,"generation":2,"shard":"0008"},"000000067F00004005000060F80100000000-000000067F00004005000060FB0000014000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005000060F70000180000-000000067F00004005000060F70000184000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004A2693B-000000067F00004005000060F30004A7F98F__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002C71F27-000000067F00004005000060F30002C9AFB8__000000C824C09619-000000CC13D2E549":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300038075AF-000000067F00004005000060F30100000000__000000FF8B261599-000001048B25A8E9":{"file_size":49823744,"generation":2,"shard":"0008"},"000000067F0000400500DBCED50000028000-000000067F0000400500DBCED5000002C000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004188000-000000067F00004005000060F300041D9101__0000012694E36301-0000012A3F140591":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30006868000-000000067F00004005000060F50100000000__00000178C5D5D3A8":{"file_size":116645888,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A789A0-000000067F00004005000060F30003AB9907__0000010FB1BE19B9-00000113456156F1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000368000-000000067F0000400500EB4A48000036FF11__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300047EC0CA-000000067F00004005000060F300047F5138__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001AB8B97-000000067F00004005000060F70001AC115C__0000015304A396B9-0000015670D6AFD9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D61283-000000067F00004005000060F70000D8985C__000000C462B3C2A9-000000C824C09619":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300011D1111-000000067F00004005000060F3000122A1D5__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001967D34-000000067F00004005016EA00C000197FBD0__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500FA2AD3000004D85C-000000067F0000400500FB3D300100000000__0000010D77B487A0":{"file_size":31309824,"generation":2,"shard":"0008"},"000000067F000040050081DB4300005BCFD7-000000067F000040050081DB4300005D704F__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000004000-000000067F00004005000060F100000260F2__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F0000400500EE16BC00000F8000-000000067F0000400500EE16BC000014158C__000000F901689359-000000FCCD5238B1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000921E8A-000000067F00004005000060F60100000000__00000028C365FBE1-0000002D2A8E0B81":{"file_size":228564992,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001190000-000000067F00004005000060FB0001198B44__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300067A0000-000000067F00004005000060F300067A4000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000200000-000000067F00004005000060F10000204000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FF0FBB-000000067F00004005000060F3000407201D__00000122A7BB7B29-0000012694E36301":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000001C000-000000067F00004005000060F3000008228D__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005016EA00C0001CD7376-030000000000000000000000000000000002__000001B6FFE46BC9-000001BA93C39481":{"file_size":70238208,"generation":11,"shard":"0008"},"000000067F00004005000060FB0000EBC000-000000067F00004005000060FB0000EC8000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000293210E-000000067F00004005000060F30002983166__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000151F271-000000067F00004005000060F30100000000__000000636DE92159-000000663565F8C9":{"file_size":41271296,"generation":2,"shard":"0008"},"000000067F00004005000060F30004880000-000000067F00004005000060F30004884000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000518222-000000067F00004005000060F20100000000__0000005413AB3641-00000057593D8169":{"file_size":169492480,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00003E0000-000000067F00004005016EA00C00003E4000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30000775A02-000000067F00004005000060F60100000000__0000002427BD8BD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000197FBD0-000000067F00004005016EA00C00019C7A6A__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F3000067114B-000000067F00004005000060F60100000000__0000001B59EEB909-0000001FFBC01501":{"file_size":232669184,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001408000-000000067F00004005000060FB000140C000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800001F8000-000000067F0000400500EB4A4800001FC000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000290000-000000067F0000400500EB4A480000294000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003061089-000000067F00004005000060F3000306A02D__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CE4000-000000067F00004005000060F30001CF0197__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000E20000-000000067F00004005000060F70000E24000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001D0000-000000067F000040050081DB4300001D4000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005D184F6-000000067F00004005000060F30100000000__0000016143292911-00000164DEE06671":{"file_size":200163328,"generation":2,"shard":"0008"},"000000067F00004005000060F300066F4000-000000067F00004005000060F30006700000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A38000-000000067F000040050081DB430000A4A074__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000F38000-000000067F00004005000060F30000F59017__00000047E31D98D1-0000004C49155071":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C0C000-000000067F00004005000060FB0000C18000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006D34000-000000067F00004005000060F30006D60000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005010660F501FFFFFFFF-000000067F00004005010660F50300000000__0000011F1A40FA69-00000122A7BB7B29":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700013E85D1-000000067F00004005000060F70001410BBC__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000538B44-000000067F00004005000060FB0000551689__0000001737D88379-0000001B59EEB909":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001410000-000000067F00004005000060F70001414000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300032F1113-000000067F00004005000060F3000330A1C8__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004974000-000000067F00004005000060F3000498DC49__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000625EB45-000000067F00004005000060F30006277C61__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700019E8E81-000000067F00004005000060F80100000000__0000014EC58A4A79-0000015304A396B9":{"file_size":246792192,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB5730259-000001BCB5732691":{"file_size":24576,"generation":187,"shard":"0008"},"000000067F000040050081DB4300001CC000-000000067F000040050081DB4300001D0000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002C00000-000000067F00004005000060F30002C18FAE__000000C824C09619-000000CC13D2E549":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000FC4000-000000067F00004005000060F70000FCD85E__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000107C39B-030000000000000000000000000000000002__0000004C49155071-0000004F31878919":{"file_size":133349376,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F90000-000000067F00004005016EA00C0000F94000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000F98000-000000067F00004005016EA00C0000F9C000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700019EC000-000000067F00004005000060F80100000000__0000014EDD256548":{"file_size":7421952,"generation":2,"shard":"0008"},"000000067F00004005000060F300069FA3F6-000000067F00004005000060F30006A0B44C__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300003AC000-000000067F000040050081DB4300003B27DA__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005A57691-000000067F00004005000060F30005B00697__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300060CB2C8-000000067F00004005000060F300060D4415__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000495C000-000000067F00004005000060F30004970000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000D1C5F-000000067F0000400500D69D7900000F1B5B__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001358000-030000000000000000000000000000000002__000001A95031E5B8":{"file_size":21110784,"generation":11,"shard":"0008"},"000000067F00004005000060F3000430C000-000000067F00004005000060F30004370000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004904000-000000067F00004005000060F30004958000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000008000-000000067F00004005000060F30000378000__00000186146441F1-0000018624969469":{"file_size":33357824,"generation":6,"shard":"0008"},"000000067F00004005000060F700005C0000-000000067F00004005000060F700005C85CE__00000057593D8169-0000005C01565329":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000B04000-000000067F00004005016EA00C0000B40000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30002920FA0-000000067F00004005000060F3000293210E__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002058000-000000067F00004005000060F30002070F71__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000686D0DE-000000067F00004005000060F3000689E295__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FA2AD30000004000-000000067F0000400500FA2AD30000030000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00009BF728-000000067F00004005016EA00C0000A575C7__00000196C9018F59-0000019A2EAFE7A9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30004374000-000000067F00004005000060F300043B0000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300051F0000-000000067F00004005000060F300051F4000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006B22072-000000067F00004005000060F30006B4B0BB__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000328FA4E-000000067F00004005000060F50100000000__000000E4D847F4E0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000000FEA0-000000067F00004005016EA00C000001FD3E__0000018624969469-000001880F984A29":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500EB4A48000019F4DD-030000000000000000000000000000000002__000000F6661C9241-000000F901689359":{"file_size":59498496,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00003EC000-000000067F00004005016EA00C00003F8000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C000073C000-000000067F00004005016EA00C000074F43B__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003542BFF-000000067F00004005000060F50100000000__000000EFDE07FFD8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70001771169-000000067F00004005000060F80100000000__000001398B56A519-0000013C9C0E3339":{"file_size":263454720,"generation":2,"shard":"0008"},"000000067F000040050081DB4300003B27DA-030000000000000000000000000000000002__0000008DDCD70B68":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F3000542AFB0-000000067F00004005000060F30005474062__0000015304A396B9-0000015670D6AFD9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000057C94F-000000067F00004005000060F80100000000__00000054161C34B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300055861F2-000000067F00004005000060F30100000000__0000015304A396B9-0000015670D6AFD9":{"file_size":127393792,"generation":2,"shard":"0008"},"000000067F00004005000060F30001D79136-000000067F00004005000060F30100000000__0000008DBE2855F9-000000923719A971":{"file_size":227958784,"generation":2,"shard":"0008"},"000000067F00004005000060F10000218000-000000067F00004005000060F1000021C000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CD4000-000000067F00004005016EA00C0001CE0000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F300017EC000-000000067F00004005000060F30001886B2A__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001188000-000000067F00004005000060F300011D1111__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000ECC000-000000067F00004005000060FB0000F050F2__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300018C0000-000000067F00004005000060F300018E0FE6__00000075CC373F31-00000079F2A2F311":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00006E4000-000000067F00004005016EA00C0000738000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30002790000-000000067F00004005000060F30002794000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C00001B850B-000000067F0000400500F56D510100000000__0000011B688FEDC8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F100001F8000-000000067F00004005000060F100001FC000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000810000-000000067F00004005000060F80100000000__00000079F2A2F311-0000007E3A9BFD29":{"file_size":263454720,"generation":2,"shard":"0008"},"000000067F00004005000060F100006CBF87-000000067F00004005000060F20100000000__000000A5A3F27398":{"file_size":15851520,"generation":2,"shard":"0008"},"000000067F0000400500F7D2DD0100000000-000000067F0000400500F8E3A50000014000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700010AABC7-000000067F00004005000060F80100000000__000000EFDE07FFD8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30003B80000-000000067F00004005000060F30003B84000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000078000-000000067F000040050081DB4300000AA080__00000075CC373F31-00000079F2A2F311":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002618000-000000067F00004005000060F30002680F9D__000000B2B5C4E8F9-000000B768469051":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A48000-000000067F00004005000060F30002A4C000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001994000-000000067F00004005000060F700019E8000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000B6168A-000000067F00004005000060FB0000B6A1D0__0000003579F03331-0000003959DA2DE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000147A0EC-000000067F00004005000060FB000148AC30__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC0000060000-000000067F0000400500EE16BC0000064000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003458D42-000000067F00004005000060F30003481DDB__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006E30000-000000067F00004005000060F30006E34000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700017F8000-000000067F00004005000060F700017FC000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C50000-000000067F00004005000060F30004C54000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001720000-000000067F00004005000060F80100000000__00000139CF156B58":{"file_size":63463424,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A8E15E-000000067F000040050081DB430000A98000__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":265404416,"generation":2,"shard":"0008"},"000000067F00004005000060F30004BAE526-000000067F00004005000060F30004BE7584__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001ADF97B-000000067F00004005016EA00C0001B0FD2A__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F60000014000-000000067F00004005000060F60100000000__0000003D2AB09B68":{"file_size":83329024,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C1C000-000000067F00004005000060FB0000C70000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005240000-000000067F00004005000060F30005244000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000077C000-000000067F000040050081DB430000790000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006D60000-000000067F00004005000060F30006D64000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C54000-000000067F00004005000060F30004C60000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000000000000000001-000000067F0000400500000A690000000002__00000186146441F1-0000018624969469":{"file_size":57344,"generation":6,"shard":"0008"},"000000067F00004005000060F30005688000-000000067F00004005000060F3000568C000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004370000-000000067F00004005000060F30004374000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300051F4000-000000067F00004005000060F30005210000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004DD8000-000000067F00004005000060F30004DDC000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500C782E400001AFD31-000000067F0000400500C782E400001B7C41__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000BB103B-000000067F00004005000060F60000014C3A__0000003579F03331-0000003959DA2DE9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F0000400500D19D030100000000-000000067F0000400500D69D790000024000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000028B253-030000000000000000000000000000000002__0000008196C976A1-0000008625CF2891":{"file_size":151224320,"generation":2,"shard":"0008"},"000000067F00004005000060F30004DD8000-000000067F00004005000060F30004E40FFC__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005010F44EB0100000000-000000067F00004005010F57CB000000C000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003BCC000-000000067F00004005000060F30003C08000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005B80000-000000067F00004005000060F30005B89170__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000135FCAD-000000067F00004005016EA00C000144FB4E__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005010660F500000B0000-000000067F00004005010660F500000B4000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000D31030-000000067F00004005000060F30100000000__0000003D03FCCDB9-000000417D21ACF9":{"file_size":233791488,"generation":2,"shard":"0008"},"000000067F00004005000060F30002C18FAE-000000067F00004005000060F30002C71F27__000000C824C09619-000000CC13D2E549":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000041FB53-000000067F0000400500EB4A480000447A64__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC0000048000-000000067F0000400500EE16BC000004C000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00009D0000-000000067F00004005000060FB00009D4000__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100004365FE-000000067F00004005000060F20100000000__00000047F1F2B800":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30006BAD108-000000067F00004005000060F30006C0E146__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300006B4000-000000067F00004005000060F300006E0000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000327C000-000000067F00004005000060F3000328FA4E__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003B94000-000000067F00004005000060F30003BC8000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003CB8FCF-000000067F00004005000060F30003CCA0B9__00000117EDA82C11-0000011B632CC319":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003EA902F-000000067F00004005000060F30003F72201__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C64000-000000067F00004005000060F30004C80000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000194000-000000067F000040050081DB4300001C8000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB01FFFFFFFF-000000067F00004005000060FB0300000000__0000018613A0DEA9-00000186146441F1":{"file_size":73728,"generation":5,"shard":"0008"},"000000067F00004005000060F300038B5F5B-000000067F00004005000060F300038FF04F__0000010779A7F551-0000010A5E65DF39":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001C8000-000000067F000040050081DB4300001CC000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500C782E40000137F10-000000067F0000400500C782E40000177E20__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000139C000-000000067F00004005000060FB00013B8000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000447A64-000000067F0000400500EB4A480100000000__000000FCCD5238B1-000000FF8B261599":{"file_size":40550400,"generation":2,"shard":"0008"},"000000067F00004005000060F70000418000-000000067F00004005000060F700004405CF__0000003D03FCCDB9-000000417D21ACF9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F000040050081DB430000728000-000000067F000040050081DB43000072C000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300014B0F7B-000000067F00004005000060F30100000000__000000601F43CF09-000000636DE92159":{"file_size":83951616,"generation":2,"shard":"0008"},"000000067F00004005000060F30005F3303F-000000067F00004005000060F30005FA40AD__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300012442A9-000000067F00004005000060F3000129D29A__00000057593D8169-0000005C01565329":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010B14AB-000000067F000040050081DB430100000000__000000D037B2DBD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00014CF88D-000000067F00004005016EA00C00014D7727__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30006A0B44C-000000067F00004005000060F30006A7C566__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F1000062EE46-000000067F00004005000060F20100000000__000000698F2C3A38":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CE0000-000000067F00004005016EA00C0001CE4000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F30000250000-000000067F00004005000060F30000254000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300050E8000-000000067F00004005000060F300050EC000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000259F4A3-000000067F00004005000060F30100000000__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":44433408,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A640EA-000000067F000040050081DB430000A8E15E__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003050000-000000067F00004005000060F30003061089__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C0000158000-000000067F0000400500F3A25C000016A065__0000010779A7F551-0000010A5E65DF39":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010A4000-000000067F000040050081DB4300010B14AB__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00001E0000-000000067F0000400500EE16BC00001E4000__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300055B8000-000000067F00004005000060F300055BC000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000CE4000-000000067F00004005016EA00C0000D30000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003640000-000000067F00004005000060F30003644000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000014F7AC-000000067F0000400500EB4A4800001876BD__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CD338E-000000067F00004005016EA00C0001CE79E0__000001BA93C39481-000001BCB572A4E1":{"file_size":268451840,"generation":17,"shard":"0008"},"000000067F00004005000060FB0001530B44-000000067F00004005000060FB0001541688__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300031D516C-000000067F00004005000060F30100000000__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":137863168,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00019C7A6A-000000067F00004005016EA00C00019F7907__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000E7F7A7-000000067F00004005016EA00C0000F3F647__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F300032C0000-000000067F00004005000060F300032F1113__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00006E0000-000000067F00004005016EA00C00006E4000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F7000019EA78-000000067F00004005000060F80100000000__0000001737D88379-0000001B59EEB909":{"file_size":50946048,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001B4FBC9-000000067F00004005016EA00C0001BBFA66__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001660000-000000067F00004005000060FB0001680B45__000000698AF6E809-0000006DDB29D589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002BAA1DD-000000067F00004005000060F30100000000__000000C462B3C2A9-000000C824C09619":{"file_size":203554816,"generation":2,"shard":"0008"},"000000067F00004005000060F300049B26A8-000000067F00004005000060F300049CB712__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000CCB5CD-000000067F00004005000060F70000CDBB9C__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000EEA075-000000067F000040050081DB430000F0C0E9__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300003E0000-000000067F00004005000060F300003E8FBC__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006C9C000-000000067F00004005000060F30006CA0000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000C7C000-000000067F00004005000060F70000C8CD0C__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001148000-000000067F00004005000060FB000114C000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001232ACF-000000067F00004005000060F80100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000FE8000-000000067F00004005000060F700010105DB__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000355928-000000067F0000400500EB4A480100000000__000000FCD84FE628":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700003FE341-000000067F00004005000060F80100000000__0000003D2AB09B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000244D189-000000067F00004005000060F30100000000__000000A9EB8C4489-000000ACA44C8E99":{"file_size":212566016,"generation":2,"shard":"0008"},"000000067F00004005000060F700003B85C7-000000067F00004005000060F80100000000__0000003579F03331-0000003959DA2DE9":{"file_size":208945152,"generation":2,"shard":"0008"},"000000067F00004005000060F100005A2B80-000000067F00004005000060F20100000000__000000603CA8F2F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB000070C000-000000067F00004005000060FB0000718000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB01FFFFFFFF-000000067F00004005000060FB0300000000__00000186146441F1-0000018624969469":{"file_size":24576,"generation":6,"shard":"0008"},"000000067F00004005000060FB000180C000-000000067F00004005000060FB0001838000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC0000044000-000000067F0000400500EE16BC0000048000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10100000000-000000067F00004005000060F10300000000__000000A583FBFB91-000000A9EB8C4489":{"file_size":483328,"generation":2,"shard":"0008"},"000000067F00004005000060F30004EA41A5-000000067F00004005000060F30004EC52E9__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003AB9907-000000067F00004005000060F30003AF28CB__0000010FB1BE19B9-00000113456156F1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000974000-000000067F00004005000060FB00009D0000__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300038720A2-000000067F00004005000060F300038A3082__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000452BA1-000000067F000040050081DB4300004C4C1E__000000923719A971-00000096262826C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300017AA0CE-000000067F00004005000060F30100000000__0000006DDB29D589-000000722F474369":{"file_size":202719232,"generation":2,"shard":"0008"},"000000067F000040050081DB430000504000-000000067F000040050081DB430000560000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004B5431C-000000067F00004005000060F30004B654F6__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000C20000-000000067F00004005000060F30000C24000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300028920E4-000000067F00004005000060F30100000000__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":200351744,"generation":2,"shard":"0008"},"000000067F000040050081DB4300004C4C1E-030000000000000000000000000000000002__000000923719A971-00000096262826C9":{"file_size":192356352,"generation":2,"shard":"0008"},"000000067F000040050081DB430000190000-000000067F000040050081DB430000194000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000E88000-000000067F000040050081DB430000E8C000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000738000-000000067F00004005016EA00C000073C000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000578EE6-000000067F000040050081DB43000058AF5E__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001C38000-000000067F00004005000060F30001C3C000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000B7C0EA-030000000000000000000000000000000002__000000B2B5C4E8F9-000000B768469051":{"file_size":133464064,"generation":2,"shard":"0008"},"000000067F00004005000060F3000625B8F0-000000067F00004005000060F30100000000__0000016B49A934C1-0000016E1FBB7B99":{"file_size":139640832,"generation":2,"shard":"0008"},"000000067F00004005000060FB000109C000-000000067F00004005000060FB0001110000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572DFF9-000001BCB5730259":{"file_size":24576,"generation":41,"shard":"0008"},"000000067F00004005000060FB0000AA8000-000000067F00004005000060FB0000AD0B45__0000003203FB5749-0000003579F03331":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300043F8000-000000067F00004005000060F300043FC000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800003C7C42-000000067F0000400500EB4A48000041FB53__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005BA213F-000000067F00004005000060F30005BDB15B__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300063FE10E-000000067F00004005000060F30100000000__0000016E1FBB7B99-000001715E483C79":{"file_size":111067136,"generation":2,"shard":"0008"},"000000067F00004005000060F30000F91FFF-000000067F00004005000060F30000F9B026__00000047E31D98D1-0000004C49155071":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003650000-000000067F00004005000060F30003654000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300050A412B-000000067F00004005000060F300050B5199__0000014784964B91-0000014B000D1821":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D78000-000000067F00004005016EA00C0001D7C000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005016EA00C0001244000-000000067F00004005016EA00C0001298000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F100001FC000-000000067F00004005000060F10000200000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000CA0000-000000067F00004005016EA00C0000CA4000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F3000498DC49-000000067F00004005000060F50100000000__00000139CF156B58":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F60000036EA0-000000067F00004005000060F60100000000__0000009A24DF6768":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000928B45-000000067F00004005000060FB000097168A__00000028C365FBE1-0000002D2A8E0B81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006854000-000000067F00004005000060F30006858000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050109FFA2000000C3F5-030000000000000000000000000000000002__00000117EDA82C11-0000011B632CC319":{"file_size":226066432,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A6D1B3-000000067F00004005000060F30100000000__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":117620736,"generation":2,"shard":"0008"},"000000067F00004005000060F30002D2C000-000000067F00004005000060F30002D80000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A31FB6-000000067F00004005000060F30003A3B020__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000160723E-000000067F00004005016EA00C00016570D9__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500FB3D310000018000-000000067F0000400500FB3D31000001C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001708000-000000067F00004005000060F7000170C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000283C3E7-000000067F00004005000060F50100000000__000000BAC0041E18":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB00018F0000-000000067F00004005000060FB0100000000__00000075CC373F31-00000079F2A2F311":{"file_size":268959744,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000EC8000-000000067F00004005000060FB0000ECC000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F9C000-000000067F00004005016EA00C0000FF0000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30002680F9D-000000067F00004005000060F3000274A080__000000B2B5C4E8F9-000000B768469051":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000679C000-000000067F00004005000060F300067A0000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000428313F-000000067F00004005000060F300042CC1BD__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00000FFFFFFFF-030000000000000000000000000000000002__00000186146441F1-0000018624969469":{"file_size":24576,"generation":6,"shard":"0008"},"000000067F00004005000060FB00017D8000-000000067F00004005000060FB00017DC000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700017FC000-000000067F00004005000060F70001828000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002FD317C-000000067F00004005000060F30002FF427D__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001701588-000000067F00004005000060FB00017120CE__0000006DDB29D589-000000722F474369":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500000A3000FFFFFFFF-000000067F0000400500000A690000000002__000001BA93C39481-000001BCB572A4E1":{"file_size":40960,"generation":17,"shard":"0008"},"000000067F00004005000060FB0000638B45-030000000000000000000000000000000002__0000001B59EEB909-0000001FFBC01501":{"file_size":252010496,"generation":2,"shard":"0008"},"000000067F000040050081DB430000394000-000000067F000040050081DB4300003A8000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CF0197-000000067F00004005000060F50100000000__0000008DDCD70B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800000DFB51-000000067F0000400500EB4A4800000E7A62__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000014C000-000000067F00004005000060F70000180000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005948000-000000067F00004005000060F300059790CD__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000853115-000000067F00004005000060F60100000000__00000023FEF9F321-00000028C365FBE1":{"file_size":176136192,"generation":2,"shard":"0008"},"000000067F00004005000060F30004884000-000000067F00004005000060F30004888000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000513C000-000000067F00004005000060F30005160000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C000017C000-000000067F0000400500F3A25C00001B850B__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006382F14-000000067F00004005000060F3000638C06D__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A10000017F02-000000067F0000400500E3A2A100000B7E04__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001000B44-000000067F00004005000060FB0001009688__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D69D790100000000-000000067F0000400500DBCED50000024000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010A0000-000000067F000040050081DB4300010A4000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000310000-000000067F00004005000060FB0000348B45__0000000D55A212C9-000000114A805939":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F60000060038-000000067F00004005000060F60100000000__000000F91FE84F08":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CE0000-000000067F00004005000060F30001CE4000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300000AA080-000000067F000040050081DB4300000D40FF__00000075CC373F31-00000079F2A2F311":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000551689-030000000000000000000000000000000002__0000001737D88379-0000001B59EEB909":{"file_size":227418112,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000D90000-000000067F00004005000060FB0100000000__0000003D03FCCDB9-000000417D21ACF9":{"file_size":272769024,"generation":2,"shard":"0008"},"000000067F00004005000060F300059CC403-000000067F00004005000060F300059F53C6__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F2C000-000000067F00004005000060F30001F30000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000014000-000000067F00004005000060FB0000084772__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005000060F30004B654F6-000000067F00004005000060F30004BAE526__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002450000-000000067F00004005000060F30002454000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A0F066-000000067F00004005000060F50100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F60000032EBE-000000067F00004005000060F60100000000__0000008DDCD70B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB00001D8000-000000067F00004005000060FB00001DC000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000670000-000000067F00004005016EA00C0000674000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0001344000-000000067F00004005016EA00C0001358000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000D30000-000000067F00004005016EA00C0000D34000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C000012FE9A-000000067F00004005016EA00C00001F7D38__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F70000BF0000-000000067F00004005000060F70100000000__000000B2B5C4E8F9-000000B768469051":{"file_size":273809408,"generation":2,"shard":"0008"},"000000067F00004005000060F300005A0000-000000067F00004005000060F3000067114B__0000001B59EEB909-0000001FFBC01501":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000021C000-000000067F0000400500EB4A480000290000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F3C000-000000067F00004005016EA00C0000F58000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C000074F43B-030000000000000000000000000000000002__000001936E73D028":{"file_size":139264,"generation":11,"shard":"0008"},"000000067F00004005010F57CB000000C000-000000067F00004005010F99A50100000000__00000126C3C69FC0":{"file_size":22978560,"generation":2,"shard":"0008"},"000000067F00004005000060F700017E1391-000000067F00004005000060F80100000000__0000013C9C0E3339-0000013FEFA7D709":{"file_size":232677376,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CC74D7-000000067F00004005016EA00C0001CD7376__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F700005C85CE-000000067F00004005000060F700005E8B9D__00000057593D8169-0000005C01565329":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FCD352-000000067F00004005000060F30100000000__0000011F1A40FA69-00000122A7BB7B29":{"file_size":124788736,"generation":2,"shard":"0008"},"000000067F0000400500C782E400002A5E4B-000000067F0000400500C782E400002CDD5C__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700018871D6-000000067F00004005000060F80100000000__000001444EB7FC10":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30003D252C8-000000067F00004005000060F30100000000__00000117EDA82C11-0000011B632CC319":{"file_size":205963264,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001408A62-000000067F00004005000060FB00014195A7__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500C782E400001B7C41-000000067F0000400500C782E400001C7B51__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000110000-000000067F00004005000060FB0100000000__000000044854EBD1-00000008B6B51879":{"file_size":272613376,"generation":2,"shard":"0008"},"000000067F00004005000060F300004E8000-000000067F00004005000060F60100000000__0000001737D88379-0000001B59EEB909":{"file_size":260579328,"generation":2,"shard":"0008"},"000000067F00004005000060F30006DF4000-000000067F00004005000060F30006E30000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C84000-030000000000000000000000000000000002__000000BAC0041E18":{"file_size":59998208,"generation":2,"shard":"0008"},"000000067F00004005000060F30002B88FF2-000000067F00004005000060F30002BAA1DD__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000434000-000000067F00004005000060FB00004A0000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004DA8000-000000067F00004005000060F30004DAC000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300004E0000-000000067F000040050081DB4300004E4000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00001E4000-000000067F0000400500EE16BC0000201716__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C440EA-000000067F000040050081DB430000C5E15B__000000B768469051-000000BAB1E56C91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000BDAF5-000000067F0000400500D69D790100000000__000000EFDE07FFD8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A9C000-000000067F00004005000060F30002AEED02__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004DAC000-000000067F00004005000060F30004DD8000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B94000-000000067F00004005000060F70000B98000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002454000-000000067F00004005000060F30002460000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100001059CB-000000067F00004005000060F10000125BF2__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000D362CA-000000067F00004005016EA00C0000DB7D33__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30001C0A0A3-000000067F00004005000060F30100000000__0000008625CF2891-00000089F4693119":{"file_size":203063296,"generation":2,"shard":"0008"},"000000067F00004005000060F300066F0000-000000067F00004005000060F300066F4000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001414000-000000067F00004005000060F70001428000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300014CC16D-000000067F00004005000060F300014D5280__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000172AC12-030000000000000000000000000000000002__0000006DDB29D589-000000722F474369":{"file_size":186875904,"generation":2,"shard":"0008"},"000000067F000040050081DB430000E4C000-000000067F000040050081DB430000E88000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300063A50CD-000000067F00004005000060F300063FE10E__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005419E9C-000000067F00004005000060F3000542AFB0__0000015304A396B9-0000015670D6AFD9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC000014158C-030000000000000000000000000000000002__000000F901689359-000000FCCD5238B1":{"file_size":67854336,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00015FF3A0-000000067F00004005016EA00C000160723E__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005016EA00C00008E760F-000000067F00004005016EA00C00009274AB__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F70000B98000-000000067F00004005000060F70000B9C000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00004A4000-000000067F00004005000060FB00004E1FF6__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006670000-000000067F00004005000060F30006674000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000185EE9-000000067F00004005000060F7000018E4B6__0000001737D88379-0000001B59EEB909":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D19D030000067CA9-030000000000000000000000000000000002__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":29319168,"generation":2,"shard":"0008"},"000000067F0000400500FF2A51000000BFFB-030000000000000000000000000000000002__0000010D77B487A0":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F30004A048A8-000000067F00004005000060F30004A1D870__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300004BC000-000000067F00004005000060F300004C6B83__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005290FC9-000000067F00004005000060F3000533205E__0000014EC58A4A79-0000015304A396B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300031130BC-000000067F00004005000060F300031C40D1__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D19D030000047EE2-000000067F0000400500D19D03000004FDC6__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A44000-000000067F00004005000060F30002A48000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003DAE2DC-000000067F00004005000060F30003DD734C__0000011B632CC319-0000011F1A40FA69":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F8E3A50000014000-000000067F0000400500F8E3A5000004A25C__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100002F03E9-000000067F00004005000060F20100000000__000000321AA80270":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70001138000-000000067F00004005000060F80100000000__000000FCCD5238B1-000000FF8B261599":{"file_size":72695808,"generation":2,"shard":"0008"},"000000067F00004005000060F300056E4000-000000067F00004005000060F50100000000__00000159B010F6C0":{"file_size":13393920,"generation":2,"shard":"0008"},"000000067F00004005000060F70000A7C000-000000067F00004005000060F70000ABD9C4__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000CC6E51-030000000000000000000000000000000002__0000003D2AB09B68":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F60000091EFF-000000067F00004005000060F60100000000__0000014EDD256548":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000008FC41-000000067F0000400500EB4A4800000DFB51__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F363B4-000000067F00004005000060F30001F574A6__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001CD0000-000000067F00004005016EA00C0001CD4000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F300059B324D-000000067F00004005000060F300059CC403__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002530000-000000067F00004005000060F30002534000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000004B633-000000067F00004005000060F60100000000__000000C483D0D6B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700011E0000-000000067F00004005000060F80100000000__0000010779A7F551-0000010A5E65DF39":{"file_size":262922240,"generation":2,"shard":"0008"},"000000067F00004005000060F30006690000-000000067F00004005000060F30006694000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000100E18-000000067F00004005000060F700001213F2__0000000D55A212C9-000000114A805939":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F0000400500FF2A510000004000-000000067F0000400500FF2A51000000BFFB__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000EB8000-000000067F00004005000060FB0000EBC000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000674000-000000067F00004005016EA00C00006B0000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70000EF85D6-000000067F00004005000060F80100000000__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":262897664,"generation":2,"shard":"0008"},"000000067F00004005000060F700005E8B9D-000000067F00004005000060F700005F9158__00000057593D8169-0000005C01565329":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30004E40FFC-000000067F00004005000060F30004E7A062__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000037E20-000000067F0000400500EB4A480000057D31__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400501101C0901FFFFFFFF-030000000000000000000000000000000002__0000012E71CF31F9-000001334140FC21":{"file_size":65060864,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B10000-000000067F00004005000060F70100000000__000000A583FBFB91-000000A9EB8C4489":{"file_size":272646144,"generation":2,"shard":"0008"},"000000067F00004005000060F300056E104B-000000067F00004005000060F3000570A19E__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300059790CD-000000067F00004005000060F300059AA115__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B54000-000000067F00004005000060F70000B90000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300041D9101-000000067F00004005000060F3000424A099__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700000E085E-000000067F00004005000060F70000100E18__0000000D55A212C9-000000114A805939":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300051B0000-000000067F00004005000060F300051B4000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572A4E1-000001BCB572C329":{"file_size":24576,"generation":17,"shard":"0008"},"000000067F00004005000060F30006D30000-000000067F00004005000060F30006D34000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FDA1F80000020D42-000000067F0000400500FDA1F80100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F000040050081D80C0100000000-000000067F000040050081DB430000024000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F600000235B4-000000067F00004005000060F60100000000__000000603CA8F2F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500C782E400000A0000-000000067F0000400500C782E400000A4000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002264247-000000067F00004005000060F50100000000__000000A5A3F27398":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000302C2D6-000000067F00004005000060F50100000000__000000DBD29DC248":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000129C000-000000067F00004005016EA00C0001340000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700016E8000-000000067F00004005000060F700016EC000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300023A0000-000000067F00004005000060F300023B0FF7__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F20100000000-000000067F00004005000060F3000000C000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005016EA00C0000374000-000000067F00004005016EA00C00003E0000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70000368000-000000067F00004005000060F80100000000__0000003203FB5749-0000003579F03331":{"file_size":263249920,"generation":2,"shard":"0008"},"000000067F000040050081DB4300006310C9-030000000000000000000000000000000002__0000009A1ABDE921-0000009DF02C1241":{"file_size":208953344,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000DC8000-000000067F00004005000060FB0000DE8B45__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000530000-000000067F00004005000060FB0000538B44__0000001737D88379-0000001B59EEB909":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000024000-000000067F000040050081DB430000028000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000488C000-000000067F00004005000060F30004898000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300044D3639-000000067F00004005000060F50100000000__0000012E77D3BF00":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005010450640000000570-000000067F0000400501046F39000000BDD2__0000010FB1BE19B9-00000113456156F1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300021050B0-000000067F00004005000060F3000212E160__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700010DD440-000000067F00004005000060F80100000000__000000F309FCDD19-000000F6661C9241":{"file_size":91758592,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000AD0B45-000000067F00004005000060FB0000AE168A__0000003203FB5749-0000003579F03331":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000013B18E-000000067F00004005000060F7000014B73D__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001938000-000000067F00004005016EA00C000193FE9D__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500C782E400000A4000-000000067F0000400500C782E4000012A71E__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001A40000-000000067F00004005000060F30001A44000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00008578D4-000000067F00004005016EA00C00008CF772__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30001CC0000-000000067F00004005000060F30001CC4000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004D20000-000000067F00004005000060F30004D24000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00003E8000-000000067F00004005016EA00C00003EC000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F300039C4000-000000067F00004005000060F300039F8000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005164000-000000067F00004005000060F300051B0000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300039F8000-000000067F00004005000060F300039FC000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010F46BD-000000067F000040050081DB430100000000__000000D31E48D7C9-000000D74E29AAD1":{"file_size":113999872,"generation":2,"shard":"0008"},"000000067F00004005000060F30002E630CF-000000067F00004005000060F30100000000__000000D31E48D7C9-000000D74E29AAD1":{"file_size":171999232,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000ACF305-000000067F00004005016EA00C0000ADF1AB__00000196C9018F59-0000019A2EAFE7A9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30006748000-000000067F00004005000060F3000674C000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003810000-000000067F00004005000060F50100000000__00000104BD37F348":{"file_size":11739136,"generation":2,"shard":"0008"},"000000067F00004005000060F1000021C000-000000067F00004005000060F20100000000__0000002427BD8BD0":{"file_size":132448256,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00017EC000-000000067F00004005016EA00C00018C0000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F7000025DA3C-000000067F00004005000060F80100000000__0000002427BD8BD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB00007F0000-000000067F00004005000060FB0000860B45__00000023FEF9F321-00000028C365FBE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FF0000-000000067F00004005000060F30003FF4000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000E0AD15-000000067F00004005000060FB0000E1B859__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005010ADFA80000004000-000000067F00004005010F2BD40100000000__00000126C3C69FC0":{"file_size":13369344,"generation":2,"shard":"0008"},"000000067F00004005000060F30004898000-000000067F00004005000060F3000489C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003D2B1B0-000000067F00004005000060F30003D44283__0000011B632CC319-0000011F1A40FA69":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000FF4000-000000067F00004005016EA00C0001188000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005010F99A50100000000-000000067F00004005010F9F120000004000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F34000-000000067F00004005000060F30001F38F48__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700018A0000-000000067F00004005000060F700018D85CA__000001440D3D0C69-0000014784964B91":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300029A526C-000000067F00004005000060F300029C623C__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00017DC000-000000067F00004005000060FB0001808000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED50000024000-000000067F0000400500DBCED50000028000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC0000201716-000000067F0000400500EE16C40100000000__0000012A77C1B0B0":{"file_size":32768,"generation":2,"shard":"0008"},"000000067F00004005000060F30006D10000-000000067F00004005000060F30006D14000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430001064000-000000067F000040050081DB4300010A0000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C01FFFFFFFF-000000067F0000400500F3A25C0300000000__0000011B632CC319-0000011F1A40FA69":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001340000-000000067F00004005000060F30001344000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003E98000-000000067F00004005000060F30003EA902F__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006C0E146-000000067F00004005000060F30006C8729E__0000017C9F5597E1-0000018022640391":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F600000166C4-000000067F00004005000060F60100000000__0000003D03FCCDB9-000000417D21ACF9":{"file_size":54165504,"generation":2,"shard":"0008"},"000000067F00004005000060F10000180000-000000067F00004005000060F1000018821A__0000001737D88379-0000001B59EEB909":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000193FE9D-000000067F00004005016EA00C0001967D34__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F000040050081DB43000076C000-000000067F000040050081DB430000778000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300050321C0-000000067F00004005000060F30005063187__0000014784964B91-0000014B000D1821":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000D4000-000000067F0000400500DBCED500000F0000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300004B8000-000000067F00004005000060F300004BC000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000022C000-000000067F00004005000060FB0000280000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000DF968A-000000067F00004005000060FB0000E021D0__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000228000-000000067F00004005000060FB000022C000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015D8000-000000067F00004005000060FB00015DC000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005B89170-000000067F00004005000060F30005BA213F__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300043B0000-000000067F00004005000060F300043B4000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300004F8000-000000067F000040050081DB4300004FC000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006860000-000000067F00004005000060F30006864000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000ADA0D0-000000067F00004005000060F30000B0300C__0000003203FB5749-0000003579F03331":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FF2A510000000000-000000067F000040050100D04D000004369C__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C00000BB439-030000000000000000000000000000000002__00000104BD37F348":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001C078FA-000000067F00004005016EA00C0001C0F79A__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F000040050081DB430000B4A075-000000067F000040050081DB430000B7C0EA__000000B2B5C4E8F9-000000B768469051":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000117C10C-000000067F00004005000060F50100000000__00000054161C34B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000E47BD2-000000067F00004005016EA00C0000E67A6E__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30005D23BB5-000000067F00004005000060F50100000000__00000164EA9EC9A8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000336D193-000000067F00004005000060F3000337DCF3__000000E4C63CFA21-000000E7C2F1B249":{"file_size":259473408,"generation":2,"shard":"0008"},"000000067F00004005000060F300001F0000-000000067F00004005000060F300001F4000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000084772-030000000000000000000000000000000002__000000027AF9D7D0":{"file_size":147456,"generation":1,"shard":"0008"},"000000067F00004005016EA00C0001CE79E0-000000067F00004005016EA00C0001D1F87B__000001BA93C39481-000001BCB572A4E1":{"file_size":268451840,"generation":17,"shard":"0008"},"000000067F0000400500EB4A4800FFFFFFFF-000000067F0000400500EB4A480100000000__000000FF8B261599-000001048B25A8E9":{"file_size":1318912,"generation":2,"shard":"0008"},"000000067F00004005000060F70000488000-000000067F00004005000060F7000048C000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000ADF1AB-000000067F00004005016EA00C0100000000__00000196C9018F59-0000019A2EAFE7A9":{"file_size":282132480,"generation":11,"shard":"0008"},"000000067F00004005000060FB000071C000-000000067F00004005000060FB0000793506__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006850000-000000067F00004005000060F30006854000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000390000-000000067F000040050081DB430000394000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000020C000-000000067F00004005000060F30000250000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001398000-000000067F00004005000060FB000139C000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003648000-000000067F00004005000060F3000364C000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500C782E400001C7B51-000000067F0000400500C782E4000023FA62__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001788000-000000067F00004005016EA00C000178C000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000C3A075-000000067F000040050081DB430000C440EA__000000B768469051-000000BAB1E56C91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300036FE561-000000067F00004005000060F300038075AF__000000FF8B261599-000001048B25A8E9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D19D03000004FDC6-000000067F0000400500D19D030000067CA9__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C00000-000000067F00004005000060FB0000C04000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000282C000-000000067F00004005000060F3000283C3E7__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00006B0000-000000067F00004005016EA00C00006B4000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30001789027-000000067F00004005000060F300017AA0CE__0000006DDB29D589-000000722F474369":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004558000-000000067F00004005000060F300045C1062__0000012E71CF31F9-000001334140FC21":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C08000-000000067F00004005000060FB0000C0C000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006DCC000-000000067F00004005000060F30006DF0000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004B221FE-000000067F00004005000060F30004B2B250__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00018C4000-000000067F00004005016EA00C00018E0000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000564000-000000067F000040050081DB430000578000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000274A080-000000067F00004005000060F30100000000__000000B2B5C4E8F9-000000B768469051":{"file_size":199057408,"generation":2,"shard":"0008"},"000000067F00004005000060F300046D0EA8-000000067F00004005000060F3000471200E__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001114000-000000067F00004005000060FB0001120000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FEC000-000000067F00004005000060F30003FF0000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000368000-000000067F00004005000060F10100000000__0000003959DA2DE9-0000003D03FCCDB9":{"file_size":269967360,"generation":2,"shard":"0008"},"000000067F0000400500C782E4000012A71E-030000000000000000000000000000000002__000000D037B2DBD0":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F30006C98000-000000067F00004005000060F30006C9C000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300055BC000-000000067F00004005000060F30005610000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000F050F2-030000000000000000000000000000000002__00000047F1F2B800":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30002484000-000000067F00004005000060F300024D8000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FE8000-000000067F00004005000060F30003FEC000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000A8000-000000067F0000400500DBCED500000AC000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700006C3D76-000000067F00004005000060F80100000000__000000663565F8C9-000000698AF6E809":{"file_size":139821056,"generation":2,"shard":"0008"},"000000067F00004005000060F30002534000-000000067F00004005000060F3000253B7A3__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000412D27C-000000067F00004005000060F30004156457__00000122A7BB7B29-0000012694E36301":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000910000-000000067F00004005000060F700009385D4__0000008DBE2855F9-000000923719A971":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30002510000-000000067F00004005000060F30002514000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002210000-000000067F00004005000060F30002214000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FF4000-000000067F00004005000060F30004070000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001BBFA66-000000067F00004005016EA00C0001C078FA__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F3000424A099-000000067F00004005000060F3000428313F__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300036F91FE-000000067F00004005000060F30100000000__000000FCCD5238B1-000000FF8B261599":{"file_size":164118528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000718000-000000067F00004005000060FB000071C000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005010F44EB000000C000-000000067F00004005010F44EB0100000000__00000126C3C69FC0":{"file_size":70696960,"generation":2,"shard":"0008"},"000000067F00004005000060F30005214000-000000067F00004005000060F30005240000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000A7AF6E-030000000000000000000000000000000002__000000321AA80270":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30005063187-000000067F00004005000060F300050A412B__0000014784964B91-0000014B000D1821":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100005E8000-000000067F00004005000060F100005F821C__000000636DE92159-000000663565F8C9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300020830BE-000000067F00004005000060F300020FC052__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300065BB235-000000067F00004005000060F300065F42B4__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FA2AD30000034000-000000067F0000400500FA2AD3000004D85C__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00017A8000-000000067F00004005016EA00C00017AC000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB00008D8000-000000067F00004005000060FB0000928B45__00000028C365FBE1-0000002D2A8E0B81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000798000-000000067F00004005000060F300007C1007__00000023FEF9F321-00000028C365FBE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D19D030000040000-000000067F0000400500D19D030000047EE2__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30001AB1583-000000067F00004005000060F50100000000__00000081AA3C40F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001AD8000-000000067F00004005000060F30001B09104__0000008196C976A1-0000008625CF2891":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000E1B859-030000000000000000000000000000000002__000000417D21ACF9-00000044B4679349":{"file_size":156844032,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001E9C000-000000067F00004005000060FB0001EA8000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001374000-000000067F00004005000060FB0001398000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000155C000-000000067F00004005000060FB0001590000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C00000EA069-000000067F0000400500F3A25C000010C0D1__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000568C000-000000067F00004005000060F30005698000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C74000-000000067F00004005000060FB0000C98000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700004F0000-000000067F00004005000060F80100000000__00000047E31D98D1-0000004C49155071":{"file_size":264921088,"generation":2,"shard":"0008"},"000000067F00004005000060F30005598000-000000067F00004005000060F3000559C000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001429534-000000067F00004005000060F80100000000__00000122A7BB7B29-0000012694E36301":{"file_size":231964672,"generation":2,"shard":"0008"},"000000067F00004005000060F70000780000-000000067F00004005000060F80100000000__000000722F474369-00000075CC373F31":{"file_size":263340032,"generation":2,"shard":"0008"},"000000067F00004005000060F300019F31AA-000000067F00004005000060F30100000000__00000079F2A2F311-0000007E3A9BFD29":{"file_size":168484864,"generation":2,"shard":"0008"},"000000067F000040050081DB430000822079-000000067F000040050081DB43000082C0F1__000000A583FBFB91-000000A9EB8C4489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300007AC000-000000067F000040050081DB4300007F913A__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005847319-000000067F00004005000060F300058C8000__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":261505024,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001E21687-000000067F00004005000060FB0100000000__000000923719A971-00000096262826C9":{"file_size":224403456,"generation":2,"shard":"0008"},"000000067F00004005000060F30003C98000-000000067F00004005000060F30003CB8FCF__00000117EDA82C11-0000011B632CC319":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB43000045029C-030000000000000000000000000000000002__0000008DBE2855F9-000000923719A971":{"file_size":89505792,"generation":2,"shard":"0008"},"000000067F00004005000060F3000559C000-000000067F00004005000060F300055B8000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000285901B-000000067F00004005000060F300028920E4__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E64000-000000067F00004005000060F30000E70000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300015FB022-000000067F00004005000060F3000160410C__000000698AF6E809-0000006DDB29D589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006FDA081-000000067F00004005000060F30100000000__00000184624E5741-000001860C80A151":{"file_size":202276864,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000107973-000000067F0000400500EE16BC0100000000__000000F309FCDD19-000000F6661C9241":{"file_size":275456000,"generation":2,"shard":"0008"},"000000067F00004005000060F300031C40D1-000000067F00004005000060F300031D516C__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00001F7D38-000000067F00004005016EA00C000020FBCF__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500FDA1F80100000000-000000067F0000400500FF2A510000004000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001182EC9-000000067F00004005000060F80100000000__000000FF8B261599-000001048B25A8E9":{"file_size":174284800,"generation":2,"shard":"0008"},"000000067F00004005000060F700011528FB-000000067F00004005000060F70001182EC9__000000FF8B261599-000001048B25A8E9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300024DC000-000000067F00004005000060F30002510000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00000B0000-030000000000000000000000000000000002__000000021DC73119-000000044854EBD1":{"file_size":259375104,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001DF0B43-000000067F00004005000060FB0001E21687__000000923719A971-00000096262826C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000088000-000000067F00004005000060F10000090000__00000008B6B51879-0000000D55A212C9":{"file_size":264142848,"generation":2,"shard":"0008"},"000000067F00004005000060F30003968000-000000067F00004005000060F3000396C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00017AC000-000000067F00004005016EA00C00017E8000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F1000019C73D-000000067F00004005000060F20100000000__0000001B59EEB909-0000001FFBC01501":{"file_size":124698624,"generation":2,"shard":"0008"},"000000067F00004005000060F700001F8000-000000067F00004005000060F700002005D2__0000001B59EEB909-0000001FFBC01501":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001110000-000000067F00004005000060FB0001114000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F1000019842A-000000067F00004005000060F20100000000__0000001737D88379-0000001B59EEB909":{"file_size":145137664,"generation":2,"shard":"0008"},"000000067F00004005000060F700003BC000-000000067F00004005000060F700003C0000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000280000-000000067F00004005000060FB0000284000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED5000007C000-000000067F0000400500DBCED500000A8000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB5732691-000001BCB5734CD9":{"file_size":24576,"generation":239,"shard":"0008"},"000000067F00004005010660F70100000000-000000067F000040050107B547000006C000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000C24000-000000067F00004005000060F30000CA0000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000569C000-000000067F00004005000060F300056D8000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00000C7A73-030000000000000000000000000000000002__0000018624969469-000001880F984A29":{"file_size":40566784,"generation":11,"shard":"0008"},"000000067F00004005000060F30001344000-000000067F00004005000060F30001358000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F38F48-000000067F00004005000060F50100000000__0000009A24DF6768":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001760000-000000067F00004005000060F30001789027__0000006DDB29D589-000000722F474369":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F1000018821A-000000067F00004005000060F1000019842A__0000001737D88379-0000001B59EEB909":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300059AA115-000000067F00004005000060F300059B324D__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001400000-000000067F00004005000060FB0001404000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800000E7A62-000000067F0000400500EB4A480000107973__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000498000-000000067F00004005000060F3000049C000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D24000-000000067F00004005000060F70000D38000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000120E409-000000067F000040050081DB430300000000__0000018613F0A050":{"file_size":24576,"generation":3,"shard":"0008"},"000000067F00004005000060FB0001A8A1CD-000000067F00004005000060FB0100000000__0000007E3A9BFD29-0000008196C976A1":{"file_size":199622656,"generation":2,"shard":"0008"},"000000067F00004005000060F30006270000-000000067F00004005000060F50100000000__0000016E41E03CA0":{"file_size":71114752,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000BAAD15-030000000000000000000000000000000002__0000003579F03331-0000003959DA2DE9":{"file_size":182321152,"generation":2,"shard":"0008"},"000000067F00004005000060F700016205B5-000000067F00004005000060F80100000000__0000012E71CF31F9-000001334140FC21":{"file_size":266862592,"generation":2,"shard":"0008"},"000000067F00004005000060F300030C0FE5-000000067F00004005000060F30003102107__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004BC000-000000067F00004005016EA00C00004E8000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F10000440000-000000067F00004005000060F1000046821B__00000047E31D98D1-0000004C49155071":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F000040050081DB4300009C8000-000000067F000040050081DB4300009CC000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000106C000-000000067F00004005000060F700010AABC7__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000367733F-000000067F00004005000060F50100000000__000000F91FE84F08":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000478000-000000067F00004005016EA00C000047C000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30002E4104A-000000067F00004005000060F30002E4A157__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001370000-000000067F00004005000060FB0001374000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004B1111A-000000067F00004005000060F30004B221FE__0000013C9C0E3339-0000013FEFA7D709":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000029C000-000000067F00004005016EA00C00002D0000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F30001C3C000-000000067F00004005000060F30001CC0000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000136C000-000000067F00004005000060FB0001370000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000488000-000000067F00004005000060F10100000000__0000004C49155071-0000004F31878919":{"file_size":268754944,"generation":2,"shard":"0008"},"000000067F00004005000060F30000B0300C-000000067F00004005000060F60100000000__0000003203FB5749-0000003579F03331":{"file_size":212885504,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001C0F79A-000000067F00004005016EA00C0001C3F636__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F3000399C000-000000067F00004005000060F300039A0000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001574000-000000067F00004005000060F700015A195C__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005B00697-000000067F00004005000060F30100000000__0000015DD1D3C809-0000016143292911":{"file_size":282025984,"generation":2,"shard":"0008"},"000000067F00004005000060F300050C8000-000000067F00004005000060F300050CC000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700000885C5-000000067F00004005000060F80100000000__000000044854EBD1-00000008B6B51879":{"file_size":253878272,"generation":2,"shard":"0008"},"000000067F00004005000060F30001407F7A-000000067F00004005000060F50100000000__000000603CA8F2F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B90000-000000067F00004005000060F70000B94000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000560000-000000067F000040050081DB430000564000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001720000-000000067F00004005000060F700017405D4__000001398B56A519-0000013C9C0E3339":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300043CC000-000000067F00004005000060F300043F8000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000129D29A-000000067F00004005000060F30100000000__00000057593D8169-0000005C01565329":{"file_size":110788608,"generation":2,"shard":"0008"},"000000067F00004005000060F300003F9F83-000000067F00004005000060F30000402F4A__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001940000-000000067F00004005000060F700019685CE__0000014784964B91-0000014B000D1821":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300043B8000-000000067F00004005000060F300043BC000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000370FD1-000000067F00004005000060F60100000000__0000000D55A212C9-000000114A805939":{"file_size":232144896,"generation":2,"shard":"0008"},"000000067F00004005000060F30003849093-000000067F00004005000060F300038720A2__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100003C0432-000000067F00004005000060F20100000000__0000003D03FCCDB9-000000417D21ACF9":{"file_size":262701056,"generation":2,"shard":"0008"},"000000067F00004005000060F700014F85DF-000000067F00004005000060F70001510BBE__0000012694E36301-0000012A3F140591":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F3000253B7A3-000000067F00004005000060F50100000000__000000AFE87558B0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001404000-000000067F00004005000060FB0001408000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003F942CF-000000067F00004005000060F30003FCD352__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000B38000-000000067F00004005000060FB0000B58B45__0000003579F03331-0000003959DA2DE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B505C8-000000067F00004005000060F80100000000__000000A9EB8C4489-000000ACA44C8E99":{"file_size":226459648,"generation":2,"shard":"0008"},"000000067F00004005000060F3000612D506-000000067F00004005000060F30006166575__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700000DC000-000000067F00004005000060F700000E0000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FB3D31000000C000-000000067F0000400500FB3D310000018000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000000000000000000000000000000000-FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF__000001BCB572C329-000001BCB572C481":{"file_size":24576,"generation":19,"shard":"0008"},"000000067F00004005000060F30002828000-000000067F00004005000060F3000282C000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300015B0000-000000067F00004005000060F300015B4000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED50000078000-000000067F0000400500DBCED5000007C000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000086E169-030000000000000000000000000000000002__000000A583FBFB91-000000A9EB8C4489":{"file_size":77471744,"generation":2,"shard":"0008"},"000000067F0000400501046F39000000BDD2-000000067F00004005010660F500000161F7__0000010FB1BE19B9-00000113456156F1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FB3D3101FFFFFFFF-000000067F0000400500FB3D310300000000__00000122A7BB7B29-0000012694E36301":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00000F28ED-030000000000000000000000000000000002__000000F91FE84F08":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30004E9307A-000000067F00004005000060F30004EA41A5__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00016D21CF-030000000000000000000000000000000002__000000698AF6E809-0000006DDB29D589":{"file_size":226353152,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800001876BD-000000067F0000400500EB4A48000018F5CD__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500C782E400002E5B84-030000000000000000000000000000000002__000000DBD29DC248":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D8985C-000000067F00004005000060F70000DA1E38__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C28000-000000067F000040050081DB430000C3A075__000000B768469051-000000BAB1E56C91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000407201D-000000067F00004005000060F300040E319D__00000122A7BB7B29-0000012694E36301":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F6000002B3CE-000000067F00004005000060F60100000000__00000075E5D2A930":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D60000-000000067F00004005000060F80100000000__000000C483D0D6B8":{"file_size":133947392,"generation":2,"shard":"0008"},"000000067F00004005000060F70000F705D6-000000067F00004005000060F80100000000__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":259842048,"generation":2,"shard":"0008"},"000000067F00004005000060F30004E7A062-000000067F00004005000060F30004E9307A__000001440D3D0C69-0000014784964B91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006810000-000000067F00004005000060F30006814000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700007D05C8-000000067F00004005000060F80100000000__00000075CC373F31-00000079F2A2F311":{"file_size":251740160,"generation":2,"shard":"0008"},"000000067F00004005000000000000000001-000000067F0000400500000A690000000002__0000018624969469-000001880F984A29":{"file_size":40960,"generation":11,"shard":"0008"},"000000067F00004005000060FB00014D8000-000000067F00004005000060FB0001530B44__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001EA8000-000000067F00004005000060FB0001EAC000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000230A0C7-000000067F00004005000060F30100000000__000000A583FBFB91-000000A9EB8C4489":{"file_size":213680128,"generation":2,"shard":"0008"},"000000067F00004005000060F30000A98000-000000067F00004005000060F30000AC9024__0000003203FB5749-0000003579F03331":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003F72201-000000067F00004005000060F30003F7B254__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000498000-000000067F00004005016EA00C000049C000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30004CB8000-000000067F00004005000060F30004CBC000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300042CC1BD-000000067F00004005000060F300042D51D6__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FB3D310000028681-000000067F0000400500FB3D320100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000474302B-000000067F00004005000060F300047EC0CA__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003204000-000000067F00004005000060F30003278000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300024020ED-000000067F00004005000060F3000240B12A__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000216C000-000000067F00004005000060F30002170000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000005DD43-000000067F00004005000060F60100000000__000000EFDE07FFD8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000348B45-000000067F00004005000060FB000037968A__0000000D55A212C9-000000114A805939":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000778000-000000067F000040050081DB43000077C000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300011B4000-000000067F000040050081DB43000120E409__000000DBD29DC248":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003CCA0B9-000000067F00004005000060F30003D0B155__00000117EDA82C11-0000011B632CC319":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00009D4000-000000067F00004005000060FB0000A7AF6E__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700008F0000-000000067F00004005000060F80100000000__00000089F4693119-0000008DBE2855F9":{"file_size":262905856,"generation":2,"shard":"0008"},"000000067F00004005000060F30006CA0000-000000067F00004005000060F30006CA4000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000E021D0-000000067F00004005000060FB0000E0AD15__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003654000-000000067F00004005000060F3000367733F__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000DC0000-000000067F00004005000060F70000DE05C8__000000C824C09619-000000CC13D2E549":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F700018D85CA-000000067F00004005000060F80100000000__000001440D3D0C69-0000014784964B91":{"file_size":260775936,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000EAC000-000000067F00004005000060FB0000EB8000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E70000-000000067F00004005000060F30000E74000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005FE621A-000000067F00004005000060F30005FFF23F__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D20000-000000067F00004005000060F70000D24000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005244000-000000067F00004005000060F3000525C065__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400501025D9001FFFFFFFF-000000067F0000400501025D900300000000__0000011B632CC319-0000011F1A40FA69":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CD4000-000000067F00004005000060F30001CE0000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000E77906-000000067F00004005016EA00C0000E7F7A7__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F300046B41AA-000000067F00004005000060F30100000000__0000012E71CF31F9-000001334140FC21":{"file_size":199688192,"generation":2,"shard":"0008"},"000000067F000040050100D04D00000634BB-030000000000000000000000000000000002__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":173744128,"generation":2,"shard":"0008"},"000000067F00004005000060F30000CA4000-000000067F00004005000060F30000CB16B6__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004DDC000-000000067F00004005000060F30004DF086C__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005D7F2DE-000000067F00004005000060F30005DA03A8__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300048A0000-000000067F00004005000060F300048A4000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100003954D3-000000067F00004005000060F20100000000__0000003D2AB09B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300043BC000-000000067F00004005000060F300043C8000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D1C000-000000067F00004005016EA00C0001D78000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F100000D8000-000000067F00004005000060F100000E021B__0000000D55A212C9-000000114A805939":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F300060A0282-000000067F00004005000060F300060A93B5__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F1000021D8F8-000000067F00004005000060F20100000000__00000023FEF9F321-00000028C365FBE1":{"file_size":88227840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000018000-000000067F00004005000060F3000001C000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F000040050081DB430000E48000-000000067F000040050081DB430000E4C000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300003E8FBC-000000067F00004005000060F300003F9F83__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004868000-000000067F00004005000060F3000486C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700013D0000-000000067F00004005000060F700013E85D1__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001203856-030000000000000000000000000000000002__0000005413AB3641-00000057593D8169":{"file_size":157130752,"generation":2,"shard":"0008"},"000000067F00004005000060F3000029C000-000000067F00004005000060F300002C4887__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005160000-000000067F00004005000060F30005164000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FB3D31000001C000-000000067F0000400500FB3D310000028681__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000029F90B-000000067F00004005016EA00C00002D77AE__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30003620000-000000067F00004005000060F30100000000__000000F309FCDD19-000000F6661C9241":{"file_size":249372672,"generation":2,"shard":"0008"},"000000067F00004005000060F30003B90000-000000067F00004005000060F30003B94000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300001F4000-000000067F00004005000060F30000208000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001BB8000-000000067F00004005000060F30001C00FE1__0000008625CF2891-00000089F4693119":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005210000-000000067F00004005000060F30005214000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002070F71-000000067F00004005000060F30002079FDE__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000B40000-000000067F00004005000060F30000BB103B__0000003579F03331-0000003959DA2DE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000290000-000000067F00004005000060F10000298000__00000028C365FBE1-0000002D2A8E0B81":{"file_size":264134656,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00007C7B9C-000000067F00004005016EA00C0000807A34__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001548000-000000067F00004005000060FB000154C000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100005FC000-000000067F00004005000060F1000062EE46__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00001A0000-000000067F0000400500EE16BC00001A4000__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000F94000-000000067F00004005016EA00C0000F98000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70000290000-000000067F00004005000060F80100000000__00000023FEF9F321-00000028C365FBE1":{"file_size":265764864,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001BC0B44-000000067F00004005000060FB0001BD1689__0000008625CF2891-00000089F4693119":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000337DCF2-000000067F00004005000060F30003386D10__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300045C1062-000000067F00004005000060F3000460202F__0000012E71CF31F9-000001334140FC21":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006814000-000000067F00004005000060F30006850000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000073DFA8-000000067F00004005016EA00C000079FCFA__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005016EA00C000178C000-000000067F00004005016EA00C00017A8000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F1000051D1AE-000000067F00004005000060F20100000000__00000057593D8169-0000005C01565329":{"file_size":103145472,"generation":2,"shard":"0008"},"000000067F00004005000060F300034BD86C-000000067F00004005000060F30100000000__000000EBC9213D59-000000EFA7EAA9E1":{"file_size":95617024,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000008000-000000067F00004005016EA00C000000FEA0__0000018624969469-000001880F984A29":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F1000014C000-000000067F00004005000060F1000015F545__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FB3D300000000EAB-000000067F0000400500FB3D300100000000__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":12976128,"generation":2,"shard":"0008"},"000000067F000040050081DB430000028000-000000067F000040050081DB43000002C000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001BD1689-000000067F00004005000060FB0100000000__0000008625CF2891-00000089F4693119":{"file_size":223690752,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000000000-000000067F0000400500EB4A480000000001__000000FF8B261599-000001048B25A8E9":{"file_size":32768,"generation":2,"shard":"0008"},"000000067F00004005000060F30003D952B0-000000067F00004005000060F30003DAE2DC__0000011B632CC319-0000011F1A40FA69":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B30000-000000067F00004005000060F70000B505C8__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F3000549D0A6-000000067F00004005000060F300055861F2__0000015304A396B9-0000015670D6AFD9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F1000046821B-000000067F00004005000060F20100000000__00000047E31D98D1-0000004C49155071":{"file_size":266969088,"generation":2,"shard":"0008"},"000000067F00004005000060F300043C8000-000000067F00004005000060F300043CC000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001E720A2-000000067F00004005000060F30100000000__000000923719A971-00000096262826C9":{"file_size":141344768,"generation":2,"shard":"0008"},"000000067F000040050081DB4300003A8000-000000067F000040050081DB4300003AC000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700006AB7A6-000000067F00004005000060F700006C3D76__000000663565F8C9-000000698AF6E809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000570A19E-000000067F00004005000060F3000573B206__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003AF28CB-000000067F00004005000060F30003B33945__0000010FB1BE19B9-00000113456156F1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015CC000-000000067F00004005000060FB00015D8000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000A9CFB-000000067F0000400500D69D7900000D1C5F__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A30000-000000067F00004005000060F30002A34000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000047C000-000000067F00004005000060F30000498000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005FFF23F-000000067F00004005000060F300060A0282__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000029C194-000000067F00004005016EA00C00004EF809__0000018EC67807C9-000001935283F9B9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30006D64000-000000067F00004005000060F30006DC8000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001340000-000000067F00004005016EA00C0001344000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000BB0000-000000067F00004005016EA00C0000BB4000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F0000400500EB4A480000000000-000000067F0000400500EB4A480000007F0F__000000F309FCDD19-000000F6661C9241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A10000114000-000000067F0000400500E3A2A1000016321A__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000578000-030000000000000000000000000000000002__0000009A24DF6768":{"file_size":107642880,"generation":2,"shard":"0008"},"000000067F00004005000060F30006798000-000000067F00004005000060F3000679C000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100000E021B-000000067F00004005000060F1000010043F__0000000D55A212C9-000000114A805939":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F000040050081DB430000DA8000-030000000000000000000000000000000002__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":233201664,"generation":2,"shard":"0008"},"000000067F00004005000060F100004EC079-000000067F00004005000060F20100000000__00000054161C34B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F7000170C000-000000067F00004005000060F70001720000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000FCD85E-000000067F00004005000060F80100000000__000000E4D847F4E0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00015B74FF-000000067F00004005016EA00C00015FF3A0__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30000AC9024-000000067F00004005000060F30000ADA0D0__0000003203FB5749-0000003579F03331":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16C40100000000-000000067F0000400500F3A25C000006C000__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000F1B5B-000000067F0000400500D69D790100000000__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":233275392,"generation":2,"shard":"0008"},"000000067F00004005000060F30003C0C000-000000067F00004005000060F30003C257AD__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E44000-000000067F00004005000060F30000E60000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000018E4B6-000000067F00004005000060F7000019EA78__0000001737D88379-0000001B59EEB909":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00017E8000-000000067F00004005016EA00C00017EC000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003A4C09C-000000067F00004005000060F30003A6D1B3__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100000260F2-000000067F00004005000060F20100000000__000000027AF9D7D0":{"file_size":24576,"generation":1,"shard":"0008"},"000000067F00004005016EA00C0000097BDA-000000067F00004005016EA00C00000C7A73__0000018624969469-000001880F984A29":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500C782E400002CDD5C-030000000000000000000000000000000002__000000D31E48D7C9-000000D74E29AAD1":{"file_size":90923008,"generation":2,"shard":"0008"},"000000067F00004005000060F3000685C000-000000067F00004005000060F30006860000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001C84000-000000067F00004005000060FB0001CE16ED__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000CC4BC2-000000067F000040050081DB430000CD6C36__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006349DA2-000000067F00004005000060F30006382F14__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000212E160-000000067F00004005000060F30100000000__0000009DF02C1241-000000A173C00489":{"file_size":224731136,"generation":2,"shard":"0008"},"000000067F00004005000060F30001FF8691-000000067F00004005000060F30100000000__0000009A1ABDE921-0000009DF02C1241":{"file_size":256114688,"generation":2,"shard":"0008"},"000000067F00004005000060F300067F4000-000000067F00004005000060F30006810000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700015A8000-000000067F00004005000060F700016205B5__0000012E71CF31F9-000001334140FC21":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F0000400500D69D790000024000-000000067F0000400500D69D790000028000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700007AE010-000000067F00004005000060F80100000000__00000075E5D2A930":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000428000-000000067F00004005016EA00C000042C000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30001E74000-000000067F00004005000060F30001F28000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300038FF04F-000000067F00004005000060F30100000000__0000010779A7F551-0000010A5E65DF39":{"file_size":45359104,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001B0FD2A-000000067F00004005016EA00C0001B4FBC9__000001B6FFE46BC9-000001BA93C39481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30006858000-000000067F00004005000060F3000685C000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002F9A0EB-000000067F00004005000060F30002FD317C__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000808000-000000067F000040050081DB430000822079__000000A583FBFB91-000000A9EB8C4489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015DC000-000000067F00004005000060FB00015F0000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000021C000-000000067F00004005000060F7000025DA3C__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D79000007C000-000000067F0000400500D69D7900000A8000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000001EE3D-000000067F00004005000060F60100000000__00000054161C34B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F000040050081DB430000F4E15B-030000000000000000000000000000000002__000000C462B3C2A9-000000C824C09619":{"file_size":73662464,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F28000-000000067F00004005000060F30001F2C000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001F1DA6-030000000000000000000000000000000002__00000081AA3C40F0":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F70001758B92-000000067F00004005000060F70001771169__000001398B56A519-0000013C9C0E3339":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A10000010000-000000067F0000400500E3A2A10000017F02__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002A98000-000000067F00004005000060F30002A9C000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000573B206-000000067F00004005000060F300057942F4__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000860B45-030000000000000000000000000000000002__00000023FEF9F321-00000028C365FBE1":{"file_size":252788736,"generation":2,"shard":"0008"},"000000067F00004005000060F7000090B929-000000067F00004005000060F80100000000__0000008DDCD70B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F7000014B73D-000000067F00004005000060F80100000000__000000114A805939-00000013FB921C81":{"file_size":146432000,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D3C000-000000067F00004005000060F70000D60000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001514000-000000067F00004005000060F70001528000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001764000-000000067F00004005016EA00C0001788000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30001358000-000000067F00004005000060F3000135C000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001594000-000000067F00004005000060FB00015C8000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300004AC000-000000067F00004005000060F300004B8000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005610000-000000067F00004005000060F30005614000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002794000-000000067F00004005000060F300027C0000__000000BAC0041E18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C60000-000000067F00004005000060F30004C64000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700003A0000-000000067F00004005000060F700003B85C7__0000003579F03331-0000003959DA2DE9":{"file_size":268468224,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000F1034-030000000000000000000000000000000002__000000E4C63CFA21-000000E7C2F1B249":{"file_size":247480320,"generation":2,"shard":"0008"},"000000067F00004005000060F300051B4000-000000067F00004005000060F300051F0000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000003C77D-000000067F00004005000060F60100000000__000000A5A3F27398":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005010660F500000161F7-030000000000000000000000000000000002__0000010FB1BE19B9-00000113456156F1":{"file_size":64757760,"generation":2,"shard":"0008"},"000000067F00004005000060F30003F7B254-000000067F00004005000060F30003F942CF__0000011F1A40FA69-00000122A7BB7B29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004900000-000000067F00004005000060F30004904000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006F18000-000000067F00004005000060F30006F1C000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A21037-000000067F00004005000060F30003A31FB6__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000DB0000-000000067F00004005000060F30000E40F86__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001A60B43-000000067F00004005000060FB0001A71688__0000007E3A9BFD29-0000008196C976A1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006DC8000-000000067F00004005000060F30006DCC000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700006E38F6-000000067F00004005000060F80100000000__000000698F2C3A38":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000122B1C9-000000067F00004005000060F300012442A9__00000057593D8169-0000005C01565329":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000EA8000-000000067F00004005000060FB0000EAC000__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B5A072-000000067F00004005000060F80100000000__00000159B010F6C0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000144DCA3-000000067F00004005016EA00C000151F7C5__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F600000711FF-000000067F00004005000060F60100000000__00000122E1129DA0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300050EC000-000000067F00004005000060F30005138000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005260000-000000067F00004005000060F30005290FC9__0000014EC58A4A79-0000015304A396B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700012DE407-000000067F00004005000060F80100000000__000001180B3FF408":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000F10000-000000067F00004005000060F70000F185D4__000000DBBFA87AE1-000000DE2A8E4FC9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000D38000-000000067F00004005000060F70000D3C000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000006671F-000000067F00004005000060F60100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300059F53C6-000000067F00004005000060F30005A16504__0000015DD1D3C809-0000016143292911":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000B08000-000000067F000040050081DB430000B4A075__000000B2B5C4E8F9-000000B768469051":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000152C000-000000067F00004005000060F70001570000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000128000-000000067F00004005000060F3000012C000__0000018624969468":{"file_size":134422528,"generation":7,"shard":"0008"},"000000067F00004005000060F70000E24000-000000067F00004005000060F70000E387D6__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB4300002791D8-000000067F000040050081DB43000028B253__0000008196C976A1-0000008625CF2891":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F600000500F7-000000067F00004005000060F60100000000__000000D037B2DBD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000ABD9C4-000000067F00004005000060F80100000000__000000A5A3F27398":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F000040050081DB4300009CC000-000000067F000040050081DB430000A10000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700002005D2-000000067F00004005000060F80100000000__0000001B59EEB909-0000001FFBC01501":{"file_size":261169152,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001AA656E-000000067F000040050081D80C0100000000__00000081AA3C40F0":{"file_size":59138048,"generation":2,"shard":"0008"},"000000067F000040050081DB430000E14000-000000067F000040050081DB430000E48000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003DD734C-000000067F00004005000060F30003E40000__0000011B632CC319-0000011F1A40FA69":{"file_size":261046272,"generation":2,"shard":"0008"},"000000067F0000400500D19D0300FFFFFFFF-030000000000000000000000000000000002__000000DE2A8E4FC9-000000E1CD2FBBE9":{"file_size":5373952,"generation":2,"shard":"0008"},"000000067F00004005000060F30001588000-000000067F00004005000060F3000158C000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000AC000-000000067F0000400500DBCED500000D0000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EB4A48000013F89B-000000067F0000400500EB4A48000014F7AC__000000F6661C9241-000000F901689359":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300005D704F-000000067F000040050081DB4300006310C9__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A14000-000000067F000040050081DB430000A18000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001F574A6-000000067F00004005000060F30001FF8691__0000009A1ABDE921-0000009DF02C1241":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FB3D320100000000-000000067F0000400500FDA1F80000014000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001B09104-000000067F00004005000060F30001B4A119__0000008196C976A1-0000008625CF2891":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005011035750100000000-030000000000000000000000000000000002__00000159B010F6C0":{"file_size":78626816,"generation":2,"shard":"0008"},"000000067F00004005000060F1000015F545-000000067F00004005000060F20100000000__000000174479FC18":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000638C06D-000000067F00004005000060F300063A50CD__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000299C28F-000000067F00004005000060F300029A526C__000000BD9A7C56D9-000000C0C9EB88E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000364C000-000000067F00004005000060F30003650000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000CE0000-000000067F00004005016EA00C0000CE4000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB430000794000-000000067F000040050081DB4300007A8000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A18000-000000067F000040050081DB430000A1C000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000000C000-000000067F00004005000060F30000018000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F000040050081DB4300000D40FF-030000000000000000000000000000000002__00000075CC373F31-00000079F2A2F311":{"file_size":78061568,"generation":2,"shard":"0008"},"000000067F00004005000060F60000099FD8-000000067F00004005000060F60100000000__00000159B010F6C0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000330A1C8-000000067F00004005000060F3000332B1B6__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006FA900D-000000067F00004005000060F30006FDA081__00000184624E5741-000001860C80A151":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000148AC30-000000067F00004005000060FB000149B774__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C01FFFFFFFF-000000067F0000400500F3A25C0300000000__0000011F1A40FA69-00000122A7BB7B29":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30000EF1FC3-000000067F00004005000060F50100000000__00000047F1F2B800":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30006A7C566-000000067F00004005000060F30100000000__00000178B8B10551-0000017C9F5597E1":{"file_size":173072384,"generation":2,"shard":"0008"},"000000067F00004005000060FB000104B856-000000067F00004005000060FB000107C39B__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000030000-000000067F00004005000060F80100000000__000000021DC73119-000000044854EBD1":{"file_size":261341184,"generation":2,"shard":"0008"},"000000067F00004005000060F30003580FD3-000000067F00004005000060F30100000000__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":228188160,"generation":2,"shard":"0008"},"000000067F00004005000060F70001224000-000000067F00004005000060F70001232ACF__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300022B9050-000000067F00004005000060F3000230A0C7__000000A583FBFB91-000000A9EB8C4489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006654000-000000067F00004005000060F30006670000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700010D0000-000000067F00004005000060F700010D85CF__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000FD8000-030000000000000000000000000000000002__000000C824C09619-000000CC13D2E549":{"file_size":237559808,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015F0000-000000067F00004005000060FB00015F4000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F60100000000-000000067F00004005000060F70000004000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F00004005000060F70000DA1E38-000000067F00004005000060F80100000000__000000C462B3C2A9-000000C824C09619":{"file_size":209821696,"generation":2,"shard":"0008"},"000000067F00004005000060F30005D76250-000000067F00004005000060F30005D7F2DE__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000418000-000000067F00004005000060F10100000000__00000044B4679349-00000047E31D98D1":{"file_size":269148160,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B61000-000000067F00004005000060F80100000000__0000018613F0A050":{"file_size":65150976,"generation":3,"shard":"0008"},"000000067F00004005000060F300008C8000-000000067F00004005000060F300008E0F49__00000028C365FBE1-0000002D2A8E0B81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300002D8000-030000000000000000000000000000000002__0000008625CF2891-00000089F4693119":{"file_size":231907328,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C04000-000000067F00004005000060FB0000C08000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001808000-000000067F00004005000060FB000180C000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A30379-030000000000000000000000000000000002__000000AFE87558B0":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F700010D85CF-000000067F00004005000060F80100000000__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":164970496,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C70000-000000067F00004005000060FB0000C74000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001188000-000000067F00004005016EA00C000118C000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70000CB85B3-000000067F00004005000060F70000CC8B74__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30004A1D870-000000067F00004005000060F30004A2693B__000001398B56A519-0000013C9C0E3339":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00008CF772-000000067F00004005016EA00C00008E760F__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000D34000-000000067F00004005016EA00C0000D5D1E9__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C00014B79E7-000000067F00004005016EA00C00014CF88D__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F300040E319D-000000067F00004005000060F300040F41F4__00000122A7BB7B29-0000012694E36301":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002FF427D-000000067F00004005000060F30100000000__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":156073984,"generation":2,"shard":"0008"},"000000067F00004005000060F30005E0A466-000000067F00004005000060F30005E3B48F__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700005F9158-000000067F00004005000060F80100000000__00000057593D8169-0000005C01565329":{"file_size":230768640,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00018E4000-000000067F00004005016EA00C000193189A__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30005F0202C-000000067F00004005000060F30005F3303F__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000148000-000000067F00004005000060F1000014C000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300060C0000-000000067F00004005000060F300060C4000__0000016E41E03CA0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C9C000-000000067F00004005000060FB0000CC6E51__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050107B54700000A0EB1-000000067F000040050109CD330100000000__000001180B3FF408":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004EC000-000000067F00004005016EA00C00005A0000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C0000A9F465-000000067F00004005016EA00C0000ACF305__00000196C9018F59-0000019A2EAFE7A9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F30000208000-000000067F00004005000060F3000020C000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C000011E137-000000067F0000400500F67839000003E09B__000001048B25A8E9-0000010779A7F551":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30000402F4A-000000067F00004005000060F60100000000__000000114A805939-00000013FB921C81":{"file_size":166469632,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00004A8000-000000067F00004005016EA00C00004AC000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F70001968000-000000067F00004005000060F7000196C000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006EF8000-000000067F00004005000060F30006EFC000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000BB4000-000000067F00004005016EA00C0000C20000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700009C0000-000000067F00004005000060F80100000000__0000009A24DF6768":{"file_size":37371904,"generation":2,"shard":"0008"},"000000067F00004005000060F30004C84000-000000067F00004005000060F30004CB8000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002514000-000000067F00004005000060F30002530000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000DE05C8-000000067F00004005000060F80100000000__000000C824C09619-000000CC13D2E549":{"file_size":259473408,"generation":2,"shard":"0008"},"000000067F00004005000060F301FFFFFFFF-000000067F00004005000060F30300000000__00000186146441F1-0000018624969469":{"file_size":57344,"generation":6,"shard":"0008"},"000000067F00004005000060F30001886B2A-000000067F00004005000060F50100000000__00000075E5D2A930":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700006A8000-000000067F00004005000060F80100000000__000000636DE92159-000000663565F8C9":{"file_size":117022720,"generation":2,"shard":"0008"},"000000067F00004005000060FB000154C000-000000067F00004005000060FB0001558000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300053F40CC-000000067F00004005000060F30100000000__0000014EC58A4A79-0000015304A396B9":{"file_size":223453184,"generation":2,"shard":"0008"},"000000067F00004005000060F30005C95225-000000067F00004005000060F30005C9E3C4__0000016143292911-00000164DEE06671":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000558C000-000000067F00004005000060F30005598000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003FFA699-000000067F00004005000060F50100000000__00000122E1129DA0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30006F1C000-000000067F00004005000060F50100000000__000001848D082B20":{"file_size":24117248,"generation":2,"shard":"0008"},"000000067F00004005000060F3000486C000-000000067F00004005000060F30004878000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300019C2056-000000067F00004005000060F300019F31AA__00000079F2A2F311-0000007E3A9BFD29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC000004C000-000000067F0000400500EE16BC0000060000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000046EAB9-000000067F00004005000060F80100000000__000000417D21ACF9-00000044B4679349":{"file_size":48717824,"generation":2,"shard":"0008"},"000000067F000040050081DB430000790000-000000067F000040050081DB430000794000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D79000002C000-000000067F0000400500D69D790000078000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F60000026C90-000000067F00004005000060F60100000000__000000698F2C3A38":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30000738000-000000067F00004005000060F3000073C000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000204000-000000067F00004005000060F10000218000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500C782E40000177E20-000000067F0000400500C782E400001AFD31__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000048C000-000000067F00004005000060F700004B1E77__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300015F8000-000000067F00004005000060F50100000000__000000698F2C3A38":{"file_size":131276800,"generation":2,"shard":"0008"},"000000067F00004005000060F30000428000-000000067F00004005000060F3000042C000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB43000038C000-000000067F000040050081DB430000390000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB000102A1CE-000000067F00004005000060FB000103AD12__0000004C49155071-0000004F31878919":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001848000-000000067F00004005000060FB000184C000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00001DC000-000000067F00004005000060FB0000228000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00011D4000-000000067F00004005016EA00C0001228000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005016EA00C000011775B-030000000000000000000000000000000002__0000018820A34650":{"file_size":139264,"generation":11,"shard":"0008"},"000000067F00004005000060F700011B8000-000000067F00004005000060F80100000000__000001048B25A8E9-0000010779A7F551":{"file_size":263897088,"generation":2,"shard":"0008"},"000000067F00004005000060F3000660D31F-000000067F00004005000060F3000664E3CA__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC0000064000-000000067F0000400500EE16BC00000F28ED__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000525C065-000000067F00004005000060F50100000000__0000014EDD256548":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30004A7F98F-000000067F00004005000060F30100000000__000001398B56A519-0000013C9C0E3339":{"file_size":47595520,"generation":2,"shard":"0008"},"000000067F000040050100D04D000004369C-000000067F000040050100D04D000004B5AD__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F6000001A6E2-000000067F00004005000060F60100000000__00000047F1F2B800":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F700004405CF-000000067F00004005000060F80100000000__0000003D03FCCDB9-000000417D21ACF9":{"file_size":198836224,"generation":2,"shard":"0008"},"000000067F00004005000060F30002D28000-000000067F00004005000060F30002D2C000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F56D510100000000-000000067F0000400500F67839000003C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000E387D6-000000067F00004005000060F80100000000__000000D037B2DBD0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F3000213C000-000000067F00004005000060F30002168000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300060D4415-000000067F00004005000060F3000612D506__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FB3D3100000546CB-000000067F0000400500FB3D320100000000__00000122E1129DA0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F000040050081DB430000D18CA9-030000000000000000000000000000000002__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":210288640,"generation":2,"shard":"0008"},"000000067F00004005000060F60000062E4F-000000067F00004005000060F60100000000__00000104BD37F348":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C000016A065-000000067F0000400500F3A25C000017C0CB__0000010779A7F551-0000010A5E65DF39":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001AD0000-000000067F00004005000060FB0001B28B44__0000008196C976A1-0000008625CF2891":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000254000-000000067F00004005000060F30000298000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000E8C000-000000067F000040050081DB430000EA0000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300040F41F4-000000067F00004005000060F3000412D27C__00000122A7BB7B29-0000012694E36301":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00013B8000-000000067F00004005000060FB00013BC000__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700000D8000-000000067F00004005000060F700000DC000__0000000D80565628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000958000-000000067F00004005000060F700009605D8__000000923719A971-00000096262826C9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB00004A0000-000000067F00004005000060FB00004A4000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700001213F2-000000067F00004005000060F80100000000__0000000D55A212C9-000000114A805939":{"file_size":55320576,"generation":2,"shard":"0008"},"000000067F00004005000060F30004156457-000000067F00004005000060F30100000000__00000122A7BB7B29-0000012694E36301":{"file_size":96927744,"generation":2,"shard":"0008"},"000000067F00004005000060F30003278000-000000067F00004005000060F3000327C000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000158F667-000000067F00004005016EA00C00015B74FF__000001AC25760149-000001AFC313C819":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001D50000-000000067F00004005000060FB0001D88B43__0000008DBE2855F9-000000923719A971":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F60000054AE8-000000067F00004005000060F60100000000__000000DBD29DC248":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300002C4887-000000067F00004005000060F60100000000__0000000D80565628":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70001B34000-000000067F00004005000060F70001B5A072__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F600000416A8-000000067F00004005000060F60100000000__000000AFE87558B0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F10000050000-000000067F00004005000060F10000058000__000000044854EBD1-00000008B6B51879":{"file_size":264011776,"generation":2,"shard":"0008"},"000000067F00004005000060F300043FC000-000000067F00004005000060F300044D3639__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30004878000-000000067F00004005000060F3000487C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000396C000-000000067F00004005000060F30003998000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00019F7907-000000067F00004005016EA00C0001A477A4__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268443648,"generation":11,"shard":"0008"},"000000067F00004005016EA00C00014D7727-000000067F00004005016EA00C00014E75C6__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005016EA00C00016570D9-030000000000000000000000000000000002__000001AC25760149-000001AFC313C819":{"file_size":86335488,"generation":11,"shard":"0008"},"000000067F00004005000060F70001270000-000000067F00004005000060F80100000000__0000010FB1BE19B9-00000113456156F1":{"file_size":265363456,"generation":2,"shard":"0008"},"000000067F0000400500EB4A4800003BFD31-000000067F0000400500EB4A4800003C7C42__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300014B31F8-000000067F00004005000060F300014CC16D__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000D5D1E9-030000000000000000000000000000000002__0000019E7001E460":{"file_size":139264,"generation":11,"shard":"0008"},"000000067F00004005000060F100003B8214-000000067F00004005000060F100003C0432__0000003D03FCCDB9-000000417D21ACF9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001346854-000000067F00004005016EA00C000135FCAD__000001A931C135B1-000001AC25760149":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F3000160410C-000000067F00004005000060F3000165515A__000000698AF6E809-0000006DDB29D589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000118B12B-030000000000000000000000000000000002__00000054161C34B8":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30006DF0000-000000067F00004005000060F30006DF4000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700003C4000-000000067F00004005000060F700003FE341__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000FF0000-000000067F00004005000060F30100000000__0000004C49155071-0000004F31878919":{"file_size":256286720,"generation":2,"shard":"0008"},"000000067F00004005000060FB00015F4000-000000067F00004005000060FB00015FCD31__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005816253-000000067F00004005000060F30005847319__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002460000-000000067F00004005000060F30002464000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000113A337-000000067F00004005000060F700011528FB__000000FF8B261599-000001048B25A8E9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB000037968A-030000000000000000000000000000000002__0000000D55A212C9-000000114A805939":{"file_size":226426880,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000128000-000000067F00004005016EA00C000012FE9A__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F0000400500EB4A48000036FF11-000000067F0000400500EB4A4800003A7E20__000000FCCD5238B1-000000FF8B261599":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000658113F-000000067F00004005000060F3000659A203__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D18000-000000067F00004005016EA00C0001D1C000__000001BCB572A4E0":{"file_size":134422528,"generation":17,"shard":"0008"},"000000067F00004005000060F30001A44000-000000067F00004005000060F30001AB1583__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F10000138000-000000067F00004005000060F1000013C000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300009BC000-000000067F00004005000060F30000A50000__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F7000110E30C-000000067F00004005000060F80100000000__000000F91FE84F08":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F50100000000-000000067F00004005000060F60000014000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006F18000-000000067F00004005000060F30006FA900D__00000184624E5741-000001860C80A151":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001D88B43-000000067F00004005000060FB0100000000__0000008DBE2855F9-000000923719A971":{"file_size":249028608,"generation":2,"shard":"0008"},"000000067F00004005000060F3000122A1D5-000000067F00004005000060F30100000000__0000005413AB3641-00000057593D8169":{"file_size":48783360,"generation":2,"shard":"0008"},"000000067F00004005000060F30006277C61-000000067F00004005000060F30006320C60__0000016E1FBB7B99-000001715E483C79":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000388000-000000067F000040050081DB43000038C000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000E67A6E-000000067F00004005016EA00C0000E77906__0000019E2C5DCEE1-000001A1DD8B4481":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F300009B8000-000000067F00004005000060F300009BC000__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400501025D900000068000-000000067F00004005010450640000000570__0000010FB1BE19B9-00000113456156F1":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB00002D4B6A-030000000000000000000000000000000002__0000000D80565628":{"file_size":147456,"generation":2,"shard":"0008"},"000000067F00004005000060F30001E50FF3-000000067F00004005000060F30001E720A2__000000923719A971-00000096262826C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00005A4000-000000067F00004005016EA00C0000670000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060FB0000C18000-000000067F00004005000060FB0000C1C000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000BA4F5B-000000067F00004005000060F70000BBD532__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70001AC115C-000000067F00004005000060F80100000000__0000015304A396B9-0000015670D6AFD9":{"file_size":237248512,"generation":2,"shard":"0008"},"000000067F00004005000060F30004D24000-000000067F00004005000060F30004DA8000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30006CA4000-000000067F00004005000060F30006D10000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500EE16BC00001433D0-030000000000000000000000000000000002__000000FCCD5238B1-000000FF8B261599":{"file_size":146407424,"generation":2,"shard":"0008"},"000000067F00004005000060F3000165515A-000000067F00004005000060F30100000000__000000698AF6E809-0000006DDB29D589":{"file_size":112680960,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000118C000-000000067F00004005016EA00C00011D0000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F000040050081DB43000094A076-030000000000000000000000000000000002__000000A9EB8C4489-000000ACA44C8E99":{"file_size":176054272,"generation":2,"shard":"0008"},"000000067F00004005000060F70001528000-000000067F00004005000060F7000152C000__0000012E77D3BF00":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C82B50-000000067F000040050081DB430000CC4BC2__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001EF15A-000000067F000040050081DB4300002791D8__0000008196C976A1-0000008625CF2891":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F10000125BF2-000000067F00004005000060F20100000000__000000114A805939-00000013FB921C81":{"file_size":78782464,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E40F86-000000067F00004005000060F30100000000__000000417D21ACF9-00000044B4679349":{"file_size":111108096,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000FF0000-000000067F00004005016EA00C0000FF4000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30000CB16B6-000000067F00004005000060F50100000000__0000003D2AB09B68":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70001990000-000000067F00004005000060F70001994000__0000014EDD256548":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000A54000-000000067F00004005000060F30000A5F9BB__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300061B8705-000000067F00004005000060F300061D9774__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F7000084C000-000000067F00004005000060F70000858000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000848000-000000067F00004005000060F7000084C000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001D18000-000000067F00004005000060F30001D79136__0000008DBE2855F9-000000923719A971":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001558000-000000067F00004005000060FB000155C000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300024440AE-000000067F00004005000060F3000244D189__000000A9EB8C4489-000000ACA44C8E99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002CFC020-000000067F00004005000060F30100000000__000000C824C09619-000000CC13D2E549":{"file_size":150708224,"generation":2,"shard":"0008"},"000000067F000040050081DB430000A4A074-000000067F000040050081DB430000A640EA__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000C98000-000000067F00004005000060FB0000C9C000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001840000-000000067F00004005000060FB0001844000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30000802123-000000067F00004005000060F30000853115__00000023FEF9F321-00000028C365FBE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000029ED0-000000067F00004005000060F80100000000__000000027AF9D7D0":{"file_size":24576,"generation":1,"shard":"0008"},"000000067F00004005016EA00C00003E4000-000000067F00004005016EA00C00003E8000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30004CBC000-000000067F00004005000060F30004D20000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000122C000-000000067F00004005016EA00C0001240000__000001A95031E5B8":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30004DF086C-000000067F00004005000060F50100000000__000001444EB7FC10":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F300050B5199-000000067F00004005000060F30100000000__0000014784964B91-0000014B000D1821":{"file_size":126124032,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001A477A4-000000067F00004005016EA00C0001ADF63C__000001B3E1B95181-000001B6FFE46BC9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F70001828000-000000067F00004005000060F7000182C000__000001444EB7FC10":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F100004F0000-000000067F00004005000060F10000518222__0000005413AB3641-00000057593D8169":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060F30005EFD576-000000067F00004005000060F30100000000__00000164DEE06671-0000016834A3FC91":{"file_size":193077248,"generation":2,"shard":"0008"},"000000067F0000400500F8E3A50100000000-000000067F0000400500FA2AD30000004000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000258E3A9-000000067F00004005000060F3000259F4A3__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000C90000-000000067F00004005000060F70000CB85B3__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005000060FB000114C000-000000067F00004005000060FB000118B12B__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003644000-000000067F00004005000060F30003648000__000000F91FE84F08":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001A50000-000000067F00004005000060FB0001A60B43__0000007E3A9BFD29-0000008196C976A1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003C257AD-000000067F00004005000060F50100000000__000001180B3FF408":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002DE8000-000000067F00004005000060F30002E4104A__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C00000C8000-000000067F0000400500F3A25C00000EA069__000001048B25A8E9-0000010779A7F551":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002174000-000000067F00004005000060F30002210000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300014D5280-000000067F00004005000060F300014E6333__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000332B1B6-000000067F00004005000060F30003344134__000000E4C63CFA21-000000E7C2F1B249":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300065F42B4-000000067F00004005000060F3000660D31F__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010E264A-000000067F000040050081DB4300010F46BD__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300069D13FA-000000067F00004005000060F300069FA3F6__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300061D9774-000000067F00004005000060F30006222843__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100005C821A-000000067F00004005000060F20100000000__000000601F43CF09-000000636DE92159":{"file_size":265183232,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000200000-000000067F0000400500EB4A480000204000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70001690000-000000067F00004005000060F70100000000__000001334140FC21-00000137115BE4D9":{"file_size":273965056,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000A575C7-000000067F00004005016EA00C0000A9F465__00000196C9018F59-0000019A2EAFE7A9":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001E6C000-000000067F00004005000060FB0001E98000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00014195A7-000000067F00004005000060FB000147A0EC__000000601F43CF09-000000636DE92159":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000AE168A-030000000000000000000000000000000002__0000003203FB5749-0000003579F03331":{"file_size":223379456,"generation":2,"shard":"0008"},"000000067F00004005000060F30000CA0000-000000067F00004005000060F30000CA4000__0000003D2AB09B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300006E4000-000000067F00004005000060F30000738000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300006E0000-000000067F00004005000060F300006E4000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001124000-000000067F00004005000060FB0001148000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500D69D7900000A8000-000000067F0000400500D69D7900000AC000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500C782E40000130000-000000067F0000400500C782E40000137F10__000000D01F399709-000000D31E48D7C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000020FBCF-000000067F00004005016EA00C0000257A6F__000001880F984A29-0000018C496B6DB1":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060FB0001B28B44-000000067F00004005000060FB0100000000__0000008196C976A1-0000008625CF2891":{"file_size":249454592,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001120000-000000067F00004005000060FB0001124000__00000054161C34B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005474062-000000067F00004005000060F3000549D0A6__0000015304A396B9-0000015670D6AFD9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500C782E4000023FA62-030000000000000000000000000000000002__000000D01F399709-000000D31E48D7C9":{"file_size":245366784,"generation":2,"shard":"0008"},"000000067F000040050081DB430000160484-030000000000000000000000000000000002__00000079F2A2F311-0000007E3A9BFD29":{"file_size":226582528,"generation":2,"shard":"0008"},"000000067F00004005000060F300038A4FB4-000000067F00004005000060F300038B5F5B__0000010779A7F551-0000010A5E65DF39":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300017E8000-000000067F00004005000060F300017EC000__00000075E5D2A930":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FB3D300100000000-000000067F0000400500FB3D31000000C000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700010105DB-000000067F00004005000060F80100000000__000000E4C63CFA21-000000E7C2F1B249":{"file_size":254935040,"generation":2,"shard":"0008"},"000000067F00004005000060F70000858570-000000067F00004005000060F80100000000__0000008196C976A1-0000008625CF2891":{"file_size":252985344,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001D4000-000000067F000040050081DB4300001E8000__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00005E0000-000000067F00004005000060FB0000638B45__0000001B59EEB909-0000001FFBC01501":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050107B547000006C000-000000067F000040050107B54700000A0EB1__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000430000-000000067F00004005000060FB0000434000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300014E6333-000000067F00004005000060F3000151F271__000000636DE92159-000000663565F8C9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500FB3D300100000000-000000067F0000400500FB3D300300000000__00000117EDA82C11-0000011B632CC319":{"file_size":65536,"generation":2,"shard":"0008"},"000000067F00004005000060F30004BE7584-000000067F00004005000060F30100000000__0000013C9C0E3339-0000013FEFA7D709":{"file_size":58204160,"generation":2,"shard":"0008"},"000000067F00004005000060F70001068000-000000067F00004005000060F80100000000__000000E7C2F1B249-000000EBC9213D59":{"file_size":168730624,"generation":2,"shard":"0008"},"000000067F00004005000060F1000013C000-000000067F00004005000060F10000148000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000659A203-000000067F00004005000060F300065BB235__000001715E483C79-000001751A7D7589":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F70000EC0000-000000067F00004005000060F70000EF85D6__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":268460032,"generation":2,"shard":"0008"},"000000067F00004005010660F500000B4000-000000067F00004005010660F500000F44CB__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300067A4000-000000067F00004005000060F300067F0000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500DBCED500000F0000-000000067F0000400500DBCED500000F4000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000768000-000000067F000040050081DB43000076C000__000000A5A3F27398":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C00018E0000-000000067F00004005016EA00C00018E4000__000001B3F17FE4E0":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30000A50000-000000067F00004005000060F30000A54000__000000321AA80270":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001E68000-000000067F00004005000060FB0001E6C000__0000009A24DF6768":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001960000-000000067F00004005000060F300019790A2__00000079F2A2F311-0000007E3A9BFD29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000B6A1D0-000000067F00004005000060FB0000BAAD15__0000003579F03331-0000003959DA2DE9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002E4A157-000000067F00004005000060F30002E630CF__000000D31E48D7C9-000000D74E29AAD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006E70000-000000067F00004005000060F30006E74000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F700004464DD-000000067F00004005000060F7000046EAB9__000000417D21ACF9-00000044B4679349":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000204000-000000067F0000400500EB4A480000218000__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300042D51D6-000000067F00004005000060F3000430E1E9__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000F30000-000000067F00004005000060FB0100000000__00000047E31D98D1-0000004C49155071":{"file_size":272302080,"generation":2,"shard":"0008"},"000000067F000040050081DB4300006F8000-030000000000000000000000000000000002__0000009DF02C1241-000000A173C00489":{"file_size":235110400,"generation":2,"shard":"0008"},"000000067F000040050081DB4300001EC000-000000067F000040050081DB4300001F1DA6__00000081AA3C40F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300038A3082-000000067F00004005000060F30100000000__000001048B25A8E9-0000010779A7F551":{"file_size":76644352,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000400000-000000067F00004005016EA00C0000404000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30003481DDB-000000067F00004005000060F30100000000__000000E7C2F1B249-000000EBC9213D59":{"file_size":107814912,"generation":2,"shard":"0008"},"000000067F00004005000060F3000489C000-000000067F00004005000060F300048A0000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000CD6C36-000000067F000040050081DB430000D18CA9__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30004888000-000000067F00004005000060F3000488C000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300008E0F49-000000067F00004005000060F30000921E8A__00000028C365FBE1-0000002D2A8E0B81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500C782E40000074000-000000067F0000400500C782E400000A0000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00011F2D11-000000067F00004005000060FB0001203856__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300046330B1-000000067F00004005000060F300046B41AA__0000012E71CF31F9-000001334140FC21":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003548000-000000067F00004005000060F30003580FD3__000000EFA7EAA9E1-000000F309FCDD19":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0001198B44-000000067F00004005000060FB00011C1688__0000005413AB3641-00000057593D8169":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000049C000-000000067F00004005000060F300004A8000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000B44000-000000067F00004005016EA00C0000BB0000__0000019E7001E460":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F700014F0000-000000067F00004005000060F700014F85DF__0000012694E36301-0000012A3F140591":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000C5E15B-000000067F000040050081DB430000C801D1__000000B768469051-000000BAB1E56C91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A10000-000000067F00004005000060F30003A21037__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006EFC000-000000067F00004005000060F30006F18000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0001D1F87B-000000067F00004005016EA00C0001D7F71A__000001BA93C39481-000001BCB572A4E1":{"file_size":268451840,"generation":17,"shard":"0008"},"000000067F00004005000060F30002A34000-000000067F00004005000060F30002A40000__000000C483D0D6B8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000F0AA88-000000067F00004005000060F80100000000__000000DBD29DC248":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30006700000-000000067F00004005000060F30006704000__00000178C5D5D3A8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CC4000-000000067F00004005000060F30001CD0000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000858000-000000067F00004005000060F80100000000__00000081AA3C40F0":{"file_size":48439296,"generation":2,"shard":"0008"},"000000067F000040050081DB4300000D6407-000000067F000040050081DB430000160484__00000079F2A2F311-0000007E3A9BFD29":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300057DD292-000000067F00004005000060F30005816253__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006222843-000000067F00004005000060F3000625B8F0__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000410000-000000067F00004005000060FB0000430B46__000000114A805939-00000013FB921C81":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F100006A8000-000000067F00004005000060F100006B0000__0000006DDB29D589-000000722F474369":{"file_size":264110080,"generation":2,"shard":"0008"},"000000067F00004005000060F3000460202F-000000067F00004005000060F300046330B1__0000012E71CF31F9-000001334140FC21":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30006E74000-000000067F00004005000060F30006EF8000__000001848D082B20":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003A3B020-000000067F00004005000060F30003A4C09C__0000010D5DC42EF9-0000010FB1BE19B9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30002535462-000000067F00004005000060F3000258E3A9__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500EB4A480000294000-000000067F0000400500EB4A480000355928__000000FCD84FE628":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016E85370000000000-030000000000000000000000000000000002__00000159A7EC8CB9-0000015DD1D3C809":{"file_size":152190976,"generation":2,"shard":"0008"},"000000067F00004005000060F3000158C000-000000067F00004005000060F300015B0000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003386D10-000000067F00004005000060F300033D7D7C__000000E7C2F1B249-000000EBC9213D59":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000E7C000-000000067F00004005000060F30000EF1FC3__00000047F1F2B800":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500FA2AD30000030000-000000067F0000400500FA2AD30000034000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005588000-000000067F00004005000060F3000558C000__00000159B010F6C0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300039A0000-000000067F00004005000060F300039A4000__0000010D77B487A0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F6000008A13D-000000067F00004005000060F60100000000__000001444EB7FC10":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060FB00017120CE-000000067F00004005000060FB000172AC12__0000006DDB29D589-000000722F474369":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30003200000-000000067F00004005000060F30003204000__000000E4D847F4E0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300007C1007-000000067F00004005000060F30000802123__00000023FEF9F321-00000028C365FBE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F3A25C000006C000-000000067F0000400500F3A25C00000BB439__00000104BD37F348":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300015B4000-000000067F00004005000060F300015F8000__000000698F2C3A38":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F300060C220F-000000067F00004005000060F300060CB2C8__0000016B49A934C1-0000016E1FBB7B99":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500F8E3A5000004A25C-000000067F0000400500F8E3A50100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002C9AFB8-000000067F00004005000060F30002CFC020__000000C824C09619-000000CC13D2E549":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005010F2BD40100000000-000000067F00004005010F44EB000000C000__00000126C3C69FC0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30002AEED02-000000067F00004005000060F50100000000__000000C483D0D6B8":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30002EB8000-000000067F00004005000060F30002F5105E__000000D74E29AAD1-000000DBBFA87AE1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500E3A2A1000016321A-030000000000000000000000000000000002__000000EFDE07FFD8":{"file_size":139264,"generation":2,"shard":"0008"},"000000067F00004005000060F3000135C000-000000067F00004005000060F30001407F7A__000000603CA8F2F0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F0000400500F67839000006AEF4-000000067F0000400500F7D2DD0100000000__0000010D77B487A0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F30005DA03A8-000000067F00004005000060F30005DC93F1__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB4300010E2072-000000067F000040050081DB430100000000__000000D01F399709-000000D31E48D7C9":{"file_size":15392768,"generation":2,"shard":"0008"},"000000067F00004005000060F300004A8000-000000067F00004005000060F300004AC000__000000174479FC18":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060FB00016E0A44-000000067F00004005000060FB0001701588__0000006DDB29D589-000000722F474369":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F300024D8000-000000067F00004005000060F300024DC000__000000AFE87558B0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003BC8000-000000067F00004005000060F30003BCC000__000001180B3FF408":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F00100000000-000000067F00004005000060F10000004000__000000027AF9D7D0":{"file_size":134422528,"generation":1,"shard":"0008"},"000000067F000040050081DB430100000000-000000067F0000400500C782E40000074000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30003D14206-000000067F00004005000060F30003D252C8__00000117EDA82C11-0000011B632CC319":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700006479E7-000000067F00004005000060F80100000000__000000603CA8F2F0":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000B9C988-000000067F00004005000060F70000BA4F5B__000000AFD23C27B9-000000B2B5C4E8F9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F0000400500D69D790000078000-000000067F0000400500D69D79000007C000__000000EFDE07FFD8":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F70000CC8B74-000000067F00004005000060F80100000000__000000BAB1E56C91-000000BD9A7C56D9":{"file_size":95657984,"generation":2,"shard":"0008"},"000000067F00004005000060FB0000708000-000000067F00004005000060FB000070C000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F000040050081DB430000EA0000-000000067F000040050081DB430000EEA075__000000C462B3C2A9-000000C824C09619":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005016EA00C000001FD3E-000000067F00004005016EA00C0000097BDA__0000018624969469-000001880F984A29":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F00004005000060F3000689E295-000000067F00004005000060F3000690F2FD__00000178B8B10551-0000017C9F5597E1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30000CE0000-000000067F00004005000060F30000D31030__0000003D03FCCDB9-000000417D21ACF9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F000040050081DB430000EA0000-030000000000000000000000000000000002__000000C483D0D6B8":{"file_size":20307968,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000807A34-000000067F00004005016EA00C00008578D4__000001935283F9B9-00000196C9018F59":{"file_size":268451840,"generation":11,"shard":"0008"},"000000067F000040050081DB430001060000-000000067F000040050081DB430001064000__000000D037B2DBD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F3000480F32C-000000067F00004005000060F3000486837F__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F700009385D4-000000067F00004005000060F80100000000__0000008DBE2855F9-000000923719A971":{"file_size":252207104,"generation":2,"shard":"0008"},"000000067F00004005000060F30000090000-000000067F00004005000060F300000C1095__000000021DC73119-000000044854EBD1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000480620C-000000067F00004005000060F3000480F32C__000001334140FC21-00000137115BE4D9":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005FA40AD-000000067F00004005000060F30005FC519A__0000016834A3FC91-0000016B49A934C1":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060FB00014A42B8-030000000000000000000000000000000002__000000601F43CF09-000000636DE92159":{"file_size":137322496,"generation":2,"shard":"0008"},"000000067F00004005000060F30001CD0000-000000067F00004005000060F30001CD4000__0000008DDCD70B68":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005016EA00C0000404000-000000067F00004005016EA00C0000428000__000001936E73D028":{"file_size":134422528,"generation":11,"shard":"0008"},"000000067F00004005000060F30002079FDE-000000067F00004005000060F300020830BE__0000009DF02C1241-000000A173C00489":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F3000487C000-000000067F00004005000060F30004880000__00000139CF156B58":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005010A188401FFFFFFFF-000000067F00004005010A18840300000000__00000137115BE4D9-000001398B56A519":{"file_size":24576,"generation":2,"shard":"0008"},"000000067F00004005000060F70000218000-000000067F00004005000060F7000021C000__0000002427BD8BD0":{"file_size":134422528,"generation":2,"shard":"0008"},"000000067F00004005000060F30005EF454F-000000067F00004005000060F30005EFD576__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"},"000000067F00004005000060F30005DC93F1-000000067F00004005000060F30005E0A466__00000164DEE06671-0000016834A3FC91":{"file_size":268451840,"generation":2,"shard":"0008"}},"disk_consistent_lsn":"1BC/B5734CD8","metadata_bytes":{"disk_consistent_lsn":"1BC/B5734CD8","prev_record_lsn":"1BC/B5734CB0","ancestor_timeline":null,"ancestor_lsn":"0/0","latest_gc_cutoff_lsn":"1BC/B5732690","initdb_lsn":"0/14EE150","pg_version":16},"lineage":{}} diff --git a/test_runner/fixtures/metrics.py b/test_runner/fixtures/metrics.py index 509f41366b..cda70be8da 100644 --- a/test_runner/fixtures/metrics.py +++ b/test_runner/fixtures/metrics.py @@ -150,6 +150,7 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = ( "pageserver_pitr_history_size", "pageserver_layer_bytes", "pageserver_layer_count", + "pageserver_visible_physical_size", "pageserver_storage_operations_seconds_count_total", "pageserver_storage_operations_seconds_sum_total", "pageserver_evictions_total", From 7a2625b8033bd869f0ab4f8c020dfab3eade56b2 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Thu, 1 Aug 2024 09:52:34 -0400 Subject: [PATCH 28/37] storage-scrubber: log version on start (#8571) Helps us better identify which version of storage scrubber is running. --------- Signed-off-by: Alex Chi Z --- Cargo.lock | 1 + storage_scrubber/Cargo.toml | 1 + storage_scrubber/src/main.rs | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index dc4f0c7b81..2677699702 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5758,6 +5758,7 @@ dependencies = [ "either", "futures", "futures-util", + "git-version", "hex", "humantime", "itertools 0.10.5", diff --git a/storage_scrubber/Cargo.toml b/storage_scrubber/Cargo.toml index 7d5b7d10b9..d19119990b 100644 --- a/storage_scrubber/Cargo.toml +++ b/storage_scrubber/Cargo.toml @@ -10,6 +10,7 @@ aws-smithy-async.workspace = true either.workspace = true tokio-rustls.workspace = true anyhow.workspace = true +git-version.workspace = true hex.workspace = true humantime.workspace = true thiserror.workspace = true diff --git a/storage_scrubber/src/main.rs b/storage_scrubber/src/main.rs index 4c804c00c1..a111c31844 100644 --- a/storage_scrubber/src/main.rs +++ b/storage_scrubber/src/main.rs @@ -17,6 +17,11 @@ use storage_scrubber::{ use clap::{Parser, Subcommand}; use utils::id::TenantId; +use utils::{project_build_tag, project_git_version}; + +project_git_version!(GIT_VERSION); +project_build_tag!(BUILD_TAG); + #[derive(Parser)] #[command(author, version, about, long_about = None)] #[command(arg_required_else_help(true))] @@ -101,6 +106,8 @@ enum Command { async fn main() -> anyhow::Result<()> { let cli = Cli::parse(); + tracing::info!("version: {}, build_tag {}", GIT_VERSION, BUILD_TAG); + let bucket_config = BucketConfig::from_env()?; let command_log_name = match &cli.command { From a9bcabe503608169d34f85eb242a24ba423f1066 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Thu, 1 Aug 2024 10:00:06 -0400 Subject: [PATCH 29/37] fix(pageserver): skip existing layers for btm-gc-compaction (#8498) part of https://github.com/neondatabase/neon/issues/8002 Due to the limitation of the current layer map implementation, we cannot directly replace a layer. It's interpreted as an insert and a deletion, and there will be file exist error when renaming the newly-created layer to replace the old layer. We work around that by changing the end key of the image layer. A long-term fix would involve a refactor around the layer file naming. For delta layers, we simply skip layers with the same key range produced, though it is possible to add an extra key as an alternative solution. * The image layer range for the layers generated from gc-compaction will be Key::MIN..(Key..MAX-1), to avoid being recognized as an L0 delta layer. * Skip existing layers if it turns out that we need to generate a layer with the same persistent key in the same generation. Note that it is possible that the newly-generated layer has different content from the existing layer. For example, when the user drops a retain_lsn, the compaction could have combined or dropped some records, therefore creating a smaller layer than the existing one. We discard the "optimized" layer for now because we cannot deal with such rewrites within the same generation. --------- Signed-off-by: Alex Chi Z Co-authored-by: Christian Schwarz --- pageserver/src/tenant.rs | 47 ++- .../src/tenant/storage_layer/layer_desc.rs | 14 + pageserver/src/tenant/timeline/compaction.rs | 279 ++++++++++++++++-- .../src/tenant/timeline/layer_manager.rs | 14 +- 4 files changed, 320 insertions(+), 34 deletions(-) diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index b9257dfbe8..84c5095610 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -6963,7 +6963,11 @@ mod tests { vec![ // Image layer at GC horizon PersistentLayerKey { - key_range: Key::MIN..Key::MAX, + key_range: { + let mut key = Key::MAX; + key.field6 -= 1; + Key::MIN..key + }, lsn_range: Lsn(0x30)..Lsn(0x31), is_delta: false }, @@ -6982,6 +6986,15 @@ mod tests { ] ); + // increase GC horizon and compact again + { + // Update GC info + let mut guard = tline.gc_info.write().unwrap(); + guard.cutoffs.time = Lsn(0x40); + guard.cutoffs.space = Lsn(0x40); + } + tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + Ok(()) } @@ -7333,6 +7346,15 @@ mod tests { ); } + // increase GC horizon and compact again + { + // Update GC info + let mut guard = tline.gc_info.write().unwrap(); + guard.cutoffs.time = Lsn(0x40); + guard.cutoffs.space = Lsn(0x40); + } + tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + Ok(()) } @@ -7837,6 +7859,10 @@ mod tests { ]; let verify_result = || async { + let gc_horizon = { + let gc_info = tline.gc_info.read().unwrap(); + gc_info.cutoffs.time + }; for idx in 0..10 { assert_eq!( tline @@ -7847,7 +7873,7 @@ mod tests { ); assert_eq!( tline - .get(get_key(idx as u32), Lsn(0x30), &ctx) + .get(get_key(idx as u32), gc_horizon, &ctx) .await .unwrap(), &expected_result_at_gc_horizon[idx] @@ -7873,7 +7899,24 @@ mod tests { let cancel = CancellationToken::new(); tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + verify_result().await; + // compact again + tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + verify_result().await; + + // increase GC horizon and compact again + { + // Update GC info + let mut guard = tline.gc_info.write().unwrap(); + guard.cutoffs.time = Lsn(0x38); + guard.cutoffs.space = Lsn(0x38); + } + tline.compact_with_gc(&cancel, &ctx).await.unwrap(); + verify_result().await; // no wals between 0x30 and 0x38, so we should obtain the same result + + // not increasing the GC horizon and compact again + tline.compact_with_gc(&cancel, &ctx).await.unwrap(); verify_result().await; Ok(()) diff --git a/pageserver/src/tenant/storage_layer/layer_desc.rs b/pageserver/src/tenant/storage_layer/layer_desc.rs index bd765560e4..cbd18e650f 100644 --- a/pageserver/src/tenant/storage_layer/layer_desc.rs +++ b/pageserver/src/tenant/storage_layer/layer_desc.rs @@ -41,6 +41,20 @@ pub struct PersistentLayerKey { pub is_delta: bool, } +impl std::fmt::Display for PersistentLayerKey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}..{} {}..{} is_delta={}", + self.key_range.start, + self.key_range.end, + self.lsn_range.start, + self.lsn_range.end, + self.is_delta + ) + } +} + impl PersistentLayerDesc { pub fn key(&self) -> PersistentLayerKey { PersistentLayerKey { diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 4fe9bbafab..61d662d25d 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -4,7 +4,7 @@ //! //! The old legacy algorithm is implemented directly in `timeline.rs`. -use std::collections::BinaryHeap; +use std::collections::{BinaryHeap, HashSet}; use std::ops::{Deref, Range}; use std::sync::Arc; @@ -30,7 +30,9 @@ use crate::page_cache; use crate::tenant::config::defaults::{DEFAULT_CHECKPOINT_DISTANCE, DEFAULT_COMPACTION_THRESHOLD}; use crate::tenant::remote_timeline_client::WaitCompletionError; use crate::tenant::storage_layer::merge_iterator::MergeIterator; -use crate::tenant::storage_layer::{AsLayerDesc, PersistentLayerDesc, ValueReconstructState}; +use crate::tenant::storage_layer::{ + AsLayerDesc, PersistentLayerDesc, PersistentLayerKey, ValueReconstructState, +}; use crate::tenant::timeline::ImageLayerCreationOutcome; use crate::tenant::timeline::{drop_rlock, DeltaLayerWriter, ImageLayerWriter}; use crate::tenant::timeline::{Layer, ResidentLayer}; @@ -1368,7 +1370,7 @@ impl Timeline { pub(crate) async fn generate_key_retention( self: &Arc, key: Key, - history: &[(Key, Lsn, Value)], + full_history: &[(Key, Lsn, Value)], horizon: Lsn, retain_lsn_below_horizon: &[Lsn], delta_threshold_cnt: usize, @@ -1376,14 +1378,14 @@ impl Timeline { ) -> anyhow::Result { // Pre-checks for the invariants if cfg!(debug_assertions) { - for (log_key, _, _) in history { + for (log_key, _, _) in full_history { assert_eq!(log_key, &key, "mismatched key"); } - for i in 1..history.len() { - assert!(history[i - 1].1 <= history[i].1, "unordered LSN"); - if history[i - 1].1 == history[i].1 { + for i in 1..full_history.len() { + assert!(full_history[i - 1].1 <= full_history[i].1, "unordered LSN"); + if full_history[i - 1].1 == full_history[i].1 { assert!( - matches!(history[i - 1].2, Value::Image(_)), + matches!(full_history[i - 1].2, Value::Image(_)), "unordered delta/image, or duplicated delta" ); } @@ -1414,7 +1416,7 @@ impl Timeline { } lsn_split_points.push(horizon); let mut current_idx = 0; - for item @ (_, lsn, _) in history { + for item @ (_, lsn, _) in full_history { while current_idx < lsn_split_points.len() && *lsn > lsn_split_points[current_idx] { current_idx += 1; } @@ -1459,6 +1461,68 @@ impl Timeline { if let Some((key, lsn, img)) = base_img_from_ancestor { replay_history.push((key, lsn, Value::Image(img))); } + + /// Generate debug information for the replay history + fn generate_history_trace(replay_history: &[(Key, Lsn, Value)]) -> String { + use std::fmt::Write; + let mut output = String::new(); + if let Some((key, _, _)) = replay_history.first() { + write!(output, "key={} ", key).unwrap(); + let mut cnt = 0; + for (_, lsn, val) in replay_history { + if val.is_image() { + write!(output, "i@{} ", lsn).unwrap(); + } else if val.will_init() { + write!(output, "di@{} ", lsn).unwrap(); + } else { + write!(output, "d@{} ", lsn).unwrap(); + } + cnt += 1; + if cnt >= 128 { + write!(output, "... and more").unwrap(); + break; + } + } + } else { + write!(output, "").unwrap(); + } + output + } + + fn generate_debug_trace( + replay_history: Option<&[(Key, Lsn, Value)]>, + full_history: &[(Key, Lsn, Value)], + lsns: &[Lsn], + horizon: Lsn, + ) -> String { + use std::fmt::Write; + let mut output = String::new(); + if let Some(replay_history) = replay_history { + writeln!( + output, + "replay_history: {}", + generate_history_trace(replay_history) + ) + .unwrap(); + } else { + writeln!(output, "replay_history: ",).unwrap(); + } + writeln!( + output, + "full_history: {}", + generate_history_trace(full_history) + ) + .unwrap(); + writeln!( + output, + "when processing: [{}] horizon={}", + lsns.iter().map(|l| format!("{l}")).join(","), + horizon + ) + .unwrap(); + output + } + for (i, split_for_lsn) in split_history.into_iter().enumerate() { // TODO: there could be image keys inside the splits, and we can compute records_since_last_image accordingly. records_since_last_image += split_for_lsn.len(); @@ -1483,10 +1547,27 @@ impl Timeline { } } if let Some((_, _, val)) = replay_history.first() { - assert!(val.will_init(), "invalid history, no base image"); + if !val.will_init() { + return Err(anyhow::anyhow!("invalid history, no base image")).with_context( + || { + generate_debug_trace( + Some(&replay_history), + full_history, + retain_lsn_below_horizon, + horizon, + ) + }, + ); + } } if generate_image && records_since_last_image > 0 { records_since_last_image = 0; + let replay_history_for_debug = if cfg!(debug_assertions) { + Some(replay_history.clone()) + } else { + None + }; + let replay_history_for_debug_ref = replay_history_for_debug.as_deref(); let history = std::mem::take(&mut replay_history); let mut img = None; let mut records = Vec::with_capacity(history.len()); @@ -1494,14 +1575,30 @@ impl Timeline { img = Some((*lsn, val.clone())); for (_, lsn, val) in history.into_iter().skip(1) { let Value::WalRecord(rec) = val else { - panic!("invalid record") + return Err(anyhow::anyhow!( + "invalid record, first record is image, expect walrecords" + )) + .with_context(|| { + generate_debug_trace( + replay_history_for_debug_ref, + full_history, + retain_lsn_below_horizon, + horizon, + ) + }); }; records.push((lsn, rec)); } } else { for (_, lsn, val) in history.into_iter() { let Value::WalRecord(rec) = val else { - panic!("invalid record") + return Err(anyhow::anyhow!("invalid record, first record is walrecord, expect rest are walrecord")) + .with_context(|| generate_debug_trace( + replay_history_for_debug_ref, + full_history, + retain_lsn_below_horizon, + horizon, + )); }; records.push((lsn, rec)); } @@ -1513,12 +1610,11 @@ impl Timeline { replay_history.push((key, request_lsn, Value::Image(img.clone()))); retention.push(vec![(request_lsn, Value::Image(img))]); } else { - retention.push( - split_for_lsn - .iter() - .map(|(_, lsn, value)| (*lsn, value.clone())) - .collect(), - ); + let deltas = split_for_lsn + .iter() + .map(|(_, lsn, value)| (*lsn, value.clone())) + .collect_vec(); + retention.push(deltas); } } let mut result = Vec::with_capacity(retention.len()); @@ -1533,7 +1629,7 @@ impl Timeline { result.push((lsn_split_points[idx], KeyLogAtLsn(logs))); } } - unreachable!() + unreachable!("key retention is empty") } /// An experimental compaction building block that combines compaction with garbage collection. @@ -1544,11 +1640,26 @@ impl Timeline { /// and create delta layers with all deltas >= gc horizon. pub(crate) async fn compact_with_gc( self: &Arc, - _cancel: &CancellationToken, + cancel: &CancellationToken, ctx: &RequestContext, ) -> anyhow::Result<()> { use std::collections::BTreeSet; + // Block other compaction/GC tasks from running for now. GC-compaction could run along + // with legacy compaction tasks in the future. + + let _compaction_lock = tokio::select! { + guard = self.compaction_lock.lock() => guard, + // TODO: refactor to CompactionError to correctly pass cancelled error + _ = cancel.cancelled() => return Err(anyhow!("cancelled")), + }; + + let _gc = tokio::select! { + guard = self.gc_lock.lock() => guard, + // TODO: refactor to CompactionError to correctly pass cancelled error + _ = cancel.cancelled() => return Err(anyhow!("cancelled")), + }; + info!("running enhanced gc bottom-most compaction"); scopeguard::defer! { @@ -1644,6 +1755,13 @@ impl Timeline { let mut accumulated_values = Vec::new(); let mut last_key: Option = None; + enum FlushDeltaResult { + /// Create a new resident layer + CreateResidentLayer(ResidentLayer), + /// Keep an original delta layer + KeepLayer(PersistentLayerKey), + } + #[allow(clippy::too_many_arguments)] async fn flush_deltas( deltas: &mut Vec<(Key, Lsn, crate::repository::Value)>, @@ -1654,7 +1772,7 @@ impl Timeline { lowest_retain_lsn: Lsn, ctx: &RequestContext, last_batch: bool, - ) -> anyhow::Result> { + ) -> anyhow::Result> { // Check if we need to split the delta layer. We split at the original delta layer boundary to avoid // overlapping layers. // @@ -1677,28 +1795,78 @@ impl Timeline { if !need_split && !last_batch { return Ok(None); } - let deltas = std::mem::take(deltas); + let deltas: Vec<(Key, Lsn, Value)> = std::mem::take(deltas); if deltas.is_empty() { return Ok(None); } let end_lsn = deltas.iter().map(|(_, lsn, _)| lsn).max().copied().unwrap() + 1; + let delta_key = PersistentLayerKey { + key_range: { + let key_start = deltas.first().unwrap().0; + let key_end = deltas.last().unwrap().0.next(); + key_start..key_end + }, + lsn_range: lowest_retain_lsn..end_lsn, + is_delta: true, + }; + { + // Hack: skip delta layer if we need to produce a layer of a same key-lsn. + // + // This can happen if we have removed some deltas in "the middle" of some existing layer's key-lsn-range. + // For example, consider the case where a single delta with range [0x10,0x50) exists. + // And we have branches at LSN 0x10, 0x20, 0x30. + // Then we delete branch @ 0x20. + // Bottom-most compaction may now delete the delta [0x20,0x30). + // And that wouldnt' change the shape of the layer. + // + // Note that bottom-most-gc-compaction never _adds_ new data in that case, only removes. + // That's why it's safe to skip. + let guard = tline.layers.read().await; + + if guard.contains_key(&delta_key) { + let layer_generation = guard.get_from_key(&delta_key).metadata().generation; + drop(guard); + if layer_generation == tline.generation { + // TODO: depending on whether we design this compaction process to run along with + // other compactions, there could be layer map modifications after we drop the + // layer guard, and in case it creates duplicated layer key, we will still error + // in the end. + info!( + key=%delta_key, + ?layer_generation, + "discard delta layer due to duplicated layer in the same generation" + ); + return Ok(Some(FlushDeltaResult::KeepLayer(delta_key))); + } + } + } + let mut delta_layer_writer = DeltaLayerWriter::new( tline.conf, tline.timeline_id, tline.tenant_shard_id, - deltas.first().unwrap().0, + delta_key.key_range.start, lowest_retain_lsn..end_lsn, ctx, ) .await?; - let key_end = deltas.last().unwrap().0.next(); for (key, lsn, val) in deltas { delta_layer_writer.put_value(key, lsn, val, ctx).await?; } - let delta_layer = delta_layer_writer.finish(key_end, tline, ctx).await?; - Ok(Some(delta_layer)) + let delta_layer = delta_layer_writer + .finish(delta_key.key_range.end, tline, ctx) + .await?; + Ok(Some(FlushDeltaResult::CreateResidentLayer(delta_layer))) } + // Hack the key range to be min..(max-1). Otherwise, the image layer will be + // interpreted as an L0 delta layer. + let hack_image_layer_range = { + let mut end_key = Key::MAX; + end_key.field6 -= 1; + Key::MIN..end_key + }; + // Only create image layers when there is no ancestor branches. TODO: create covering image layer // when some condition meet. let mut image_layer_writer = if self.ancestor_timeline.is_none() { @@ -1707,7 +1875,7 @@ impl Timeline { self.conf, self.timeline_id, self.tenant_shard_id, - &(Key::MIN..Key::MAX), // covers the full key range + &hack_image_layer_range, // covers the full key range lowest_retain_lsn, ctx, ) @@ -1737,6 +1905,42 @@ impl Timeline { let img = tline.get(key, tline.ancestor_lsn, ctx).await?; Ok(Some((key, tline.ancestor_lsn, img))) } + let image_layer_key = PersistentLayerKey { + key_range: hack_image_layer_range, + lsn_range: PersistentLayerDesc::image_layer_lsn_range(lowest_retain_lsn), + is_delta: false, + }; + + // Like with delta layers, it can happen that we re-produce an already existing image layer. + // This could happen when a user triggers force compaction and image generation. In this case, + // it's always safe to rewrite the layer. + let discard_image_layer = { + let guard = self.layers.read().await; + if guard.contains_key(&image_layer_key) { + let layer_generation = guard.get_from_key(&image_layer_key).metadata().generation; + drop(guard); + if layer_generation == self.generation { + // TODO: depending on whether we design this compaction process to run along with + // other compactions, there could be layer map modifications after we drop the + // layer guard, and in case it creates duplicated layer key, we will still error + // in the end. + info!( + key=%image_layer_key, + ?layer_generation, + "discard image layer due to duplicated layer key in the same generation", + ); + true + } else { + false + } + } else { + false + } + }; + + // Actually, we can decide not to write to the image layer at all at this point because + // the key and LSN range are determined. However, to keep things simple here, we still + // create this writer, and discard the writer in the end. let mut delta_values = Vec::new(); let delta_split_points = delta_split_points.into_iter().collect_vec(); @@ -1824,7 +2028,9 @@ impl Timeline { ); assert!(delta_values.is_empty(), "unprocessed keys"); - let image_layer = if let Some(writer) = image_layer_writer { + let image_layer = if discard_image_layer { + None + } else if let Some(writer) = image_layer_writer { Some(writer.finish(self, ctx).await?) } else { None @@ -1835,7 +2041,22 @@ impl Timeline { if image_layer.is_some() { 1 } else { 0 } ); let mut compact_to = Vec::new(); - compact_to.extend(delta_layers); + let mut keep_layers = HashSet::new(); + for action in delta_layers { + match action { + FlushDeltaResult::CreateResidentLayer(layer) => { + compact_to.push(layer); + } + FlushDeltaResult::KeepLayer(l) => { + keep_layers.insert(l); + } + } + } + if discard_image_layer { + keep_layers.insert(image_layer_key); + } + let mut layer_selection = layer_selection; + layer_selection.retain(|x| !keep_layers.contains(&x.layer_desc().key())); compact_to.extend(image_layer); // Step 3: Place back to the layer map. { diff --git a/pageserver/src/tenant/timeline/layer_manager.rs b/pageserver/src/tenant/timeline/layer_manager.rs index 1e4edd34ad..1bc2acbd34 100644 --- a/pageserver/src/tenant/timeline/layer_manager.rs +++ b/pageserver/src/tenant/timeline/layer_manager.rs @@ -35,6 +35,10 @@ impl LayerManager { self.layer_fmgr.get_from_desc(desc) } + pub(crate) fn get_from_key(&self, desc: &PersistentLayerKey) -> Layer { + self.layer_fmgr.get_from_key(desc) + } + /// Get an immutable reference to the layer map. /// /// We expect users only to be able to get an immutable layer map. If users want to make modifications, @@ -365,16 +369,20 @@ impl Default for LayerFileManager { } impl LayerFileManager { - fn get_from_desc(&self, desc: &PersistentLayerDesc) -> T { + fn get_from_key(&self, key: &PersistentLayerKey) -> T { // The assumption for the `expect()` is that all code maintains the following invariant: // A layer's descriptor is present in the LayerMap => the LayerFileManager contains a layer for the descriptor. self.0 - .get(&desc.key()) - .with_context(|| format!("get layer from desc: {}", desc.layer_name())) + .get(key) + .with_context(|| format!("get layer from key: {}", key)) .expect("not found") .clone() } + fn get_from_desc(&self, desc: &PersistentLayerDesc) -> T { + self.get_from_key(&desc.key()) + } + fn contains_key(&self, key: &PersistentLayerKey) -> bool { self.0.contains_key(key) } From 4eea3ce70536c3afcb7f7731d2bf7178e7271f41 Mon Sep 17 00:00:00 2001 From: Alexander Bayandin Date: Thu, 1 Aug 2024 16:55:43 +0100 Subject: [PATCH 30/37] test_runner: don't create artifacts if Allure is not enabled (#8580) ## Problem `allure_attach_from_dir` method might create `tar.zst` archives even if `--alluredir` is not set (i.e. Allure results collection is disabled) ## Summary of changes - Don't run `allure_attach_from_dir` if `--alluredir` is not set --- test_runner/fixtures/neon_fixtures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index 0c33dec784..b370a92e38 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -4529,6 +4529,13 @@ def test_output_dir( yield test_dir + # Allure artifacts creation might involve the creation of `.tar.zst` archives, + # which aren't going to be used if Allure results collection is not enabled + # (i.e. --alluredir is not set). + # Skip `allure_attach_from_dir` in this case + if not request.config.getoption("--alluredir"): + return + preserve_database_files = False for k, v in request.node.user_properties: # NB: the neon_env_builder fixture uses this fixture (test_output_dir). From 4631179320decea3e20ed8366d35698ead74bceb Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 2 Aug 2024 08:00:46 +0100 Subject: [PATCH 31/37] pageserver: refine how we delete timelines after shard split (#8436) ## Problem Previously, when we do a timeline deletion, shards will delete layers that belong to an ancestor. That is not a correctness issue, because when we delete a timeline, we're always deleting it from all shards, and destroying data for that timeline is clearly fine. However, there exists a race where one shard might start doing this deletion while another shard has not yet received the deletion request, and might try to access an ancestral layer. This creates ambiguity over the "all layers referenced by my index should always exist" invariant, which is important to detecting and reporting corruption. Now that we have a GC mode for clearing up ancestral layers, we can rely on that to clean up such layers, and avoid deleting them right away. This makes things easier to reason about: there are now no cases where a shard will delete a layer that belongs to a ShardIndex other than itself. ## Summary of changes - Modify behavior of RemoteTimelineClient::delete_all - Add `test_scrubber_physical_gc_timeline_deletion` to exercise this case - Tweak AWS SDK config in the scrubber to enable retries. Motivated by seeing the test for this feature encounter some transient "service error" S3 errors (which are probably nothing to do with the changes in this PR) --- .../src/tenant/remote_timeline_client.rs | 12 +++ storage_scrubber/src/lib.rs | 8 ++ test_runner/regress/test_storage_scrubber.py | 78 ++++++++++++++++++- 3 files changed, 97 insertions(+), 1 deletion(-) diff --git a/pageserver/src/tenant/remote_timeline_client.rs b/pageserver/src/tenant/remote_timeline_client.rs index fed666ca45..9e021c7e35 100644 --- a/pageserver/src/tenant/remote_timeline_client.rs +++ b/pageserver/src/tenant/remote_timeline_client.rs @@ -1378,6 +1378,18 @@ impl RemoteTimelineClient { .dirty .layer_metadata .drain() + .filter(|(_file_name, meta)| { + // Filter out layers that belonged to an ancestor shard. Since we are deleting the whole timeline from + // all shards anyway, we _could_ delete these, but + // - it creates a potential race if other shards are still + // using the layers while this shard deletes them. + // - it means that if we rolled back the shard split, the ancestor shards would be in a state where + // these timelines are present but corrupt (their index exists but some layers don't) + // + // These layers will eventually be cleaned up by the scrubber when it does physical GC. + meta.shard.shard_number == self.tenant_shard_id.shard_number + && meta.shard.shard_count == self.tenant_shard_id.shard_count + }) .map(|(file_name, meta)| { remote_layer_path( &self.tenant_shard_id.tenant_id, diff --git a/storage_scrubber/src/lib.rs b/storage_scrubber/src/lib.rs index 152319b731..1fc94cc174 100644 --- a/storage_scrubber/src/lib.rs +++ b/storage_scrubber/src/lib.rs @@ -16,6 +16,7 @@ use std::sync::Arc; use std::time::Duration; use anyhow::{anyhow, Context}; +use aws_config::retry::{RetryConfigBuilder, RetryMode}; use aws_sdk_s3::config::Region; use aws_sdk_s3::error::DisplayErrorContext; use aws_sdk_s3::Client; @@ -314,8 +315,15 @@ pub fn init_logging(file_name: &str) -> Option { } async fn init_s3_client(bucket_region: Region) -> Client { + let mut retry_config_builder = RetryConfigBuilder::new(); + + retry_config_builder + .set_max_attempts(Some(3)) + .set_mode(Some(RetryMode::Adaptive)); + let config = aws_config::defaults(aws_config::BehaviorVersion::v2024_03_28()) .region(bucket_region) + .retry_config(retry_config_builder.build()) .load() .await; Client::new(&config) diff --git a/test_runner/regress/test_storage_scrubber.py b/test_runner/regress/test_storage_scrubber.py index fadf438788..e3f627b6a6 100644 --- a/test_runner/regress/test_storage_scrubber.py +++ b/test_runner/regress/test_storage_scrubber.py @@ -13,6 +13,7 @@ from fixtures.neon_fixtures import ( NeonEnv, NeonEnvBuilder, ) +from fixtures.pg_version import PgVersion from fixtures.remote_storage import S3Storage, s3_storage from fixtures.utils import wait_until from fixtures.workload import Workload @@ -265,10 +266,85 @@ def test_scrubber_physical_gc_ancestors( # attach it, to drop any local state, then check it's still readable. workload.stop() drop_local_state(env, tenant_id) - workload.validate() +def test_scrubber_physical_gc_timeline_deletion(neon_env_builder: NeonEnvBuilder): + """ + When we delete a timeline after a shard split, the child shards do not directly delete the + layers in the ancestor shards. They rely on the scrubber to clean up. + """ + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.num_pageservers = 2 + + env = neon_env_builder.init_configs() + env.start() + + tenant_id = TenantId.generate() + timeline_id = TimelineId.generate() + env.neon_cli.create_tenant( + tenant_id, + timeline_id, + shard_count=None, + conf={ + # Small layers and low compaction thresholds, so that when we split we can expect some to + # be dropped by child shards + "checkpoint_distance": f"{1024 * 1024}", + "compaction_threshold": "1", + "compaction_target_size": f"{1024 * 1024}", + "image_creation_threshold": "2", + "image_layer_creation_check_threshold": "0", + # Disable background compaction, we will do it explicitly + "compaction_period": "0s", + # No PITR, so that as soon as child shards generate an image layer, it covers ancestor deltas + # and makes them GC'able + "pitr_interval": "0s", + }, + ) + + # Make sure the original shard has some layers + workload = Workload(env, tenant_id, timeline_id) + workload.init() + workload.write_rows(100) + + new_shard_count = 4 + shards = env.storage_controller.tenant_shard_split(tenant_id, shard_count=new_shard_count) + + # Create a second timeline so that when we delete the first one, child shards still have some content in S3. + # + # This is a limitation of the scrubber: if a shard isn't in S3 (because it has no timelines), then the scrubber + # doesn't know about it, and won't perceive its ancestors as ancestors. + other_timeline_id = TimelineId.generate() + env.storage_controller.pageserver_api().timeline_create( + PgVersion.NOT_SET, tenant_id, other_timeline_id + ) + + # Write after split so that child shards have some indices in S3 + workload.write_rows(100, upload=False) + for shard in shards: + ps = env.get_tenant_pageserver(shard) + log.info(f"Waiting for shard {shard} on pageserver {ps.id}") + ps.http_client().timeline_checkpoint( + shard, timeline_id, compact=False, wait_until_uploaded=True + ) + + # The timeline still exists in child shards and they reference its layers, so scrubbing + # now shouldn't delete anything. + gc_summary = env.storage_scrubber.pageserver_physical_gc(min_age_secs=0, mode="full") + assert gc_summary["remote_storage_errors"] == 0 + assert gc_summary["indices_deleted"] == 0 + assert gc_summary["ancestor_layers_deleted"] == 0 + + # Delete the timeline + env.storage_controller.pageserver_api().timeline_delete(tenant_id, timeline_id) + + # Subsequently doing physical GC should clean up the ancestor layers + gc_summary = env.storage_scrubber.pageserver_physical_gc(min_age_secs=0, mode="full") + assert gc_summary["remote_storage_errors"] == 0 + assert gc_summary["indices_deleted"] == 0 + assert gc_summary["ancestor_layers_deleted"] > 0 + + def test_scrubber_physical_gc_ancestors_split(neon_env_builder: NeonEnvBuilder): """ Exercise ancestor GC while a tenant is partly split: this test ensures that if we have some child shards From 3833e30d44f7cd05842f00e0c2b5e308f21371b7 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 2 Aug 2024 09:37:44 +0100 Subject: [PATCH 32/37] storage_controller: start adding chaos hooks (#7946) Chaos injection bridges the gap between automated testing (where we do lots of different things with small, short-lived tenants), and staging (where we do many fewer things, but with larger, long-lived tenants). This PR adds a first type of chaos which isn't really very chaotic: it's live migration of tenants between healthy pageservers. This nevertheless provides continuous checks that things like clean, prompt shutdown of tenants works for realistically deployed pageservers with realistically large tenants. --- Cargo.lock | 1 + storage_controller/Cargo.toml | 1 + storage_controller/src/main.rs | 28 ++++++++ storage_controller/src/service.rs | 2 + .../src/service/chaos_injector.rs | 71 +++++++++++++++++++ 5 files changed, 103 insertions(+) create mode 100644 storage_controller/src/service/chaos_injector.rs diff --git a/Cargo.lock b/Cargo.lock index 2677699702..764c0fbd30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5703,6 +5703,7 @@ dependencies = [ "pageserver_client", "postgres_connection", "r2d2", + "rand 0.8.5", "reqwest 0.12.4", "routerify", "scopeguard", diff --git a/storage_controller/Cargo.toml b/storage_controller/Cargo.toml index d14b235046..ecaac04915 100644 --- a/storage_controller/Cargo.toml +++ b/storage_controller/Cargo.toml @@ -32,6 +32,7 @@ once_cell.workspace = true pageserver_api.workspace = true pageserver_client.workspace = true postgres_connection.workspace = true +rand.workspace = true reqwest = { workspace = true, features = ["stream"] } routerify.workspace = true serde.workspace = true diff --git a/storage_controller/src/main.rs b/storage_controller/src/main.rs index adbf5c6496..2799f21fdc 100644 --- a/storage_controller/src/main.rs +++ b/storage_controller/src/main.rs @@ -9,12 +9,14 @@ use std::time::Duration; use storage_controller::http::make_router; use storage_controller::metrics::preinitialize_metrics; use storage_controller::persistence::Persistence; +use storage_controller::service::chaos_injector::ChaosInjector; use storage_controller::service::{ Config, Service, MAX_OFFLINE_INTERVAL_DEFAULT, MAX_WARMING_UP_INTERVAL_DEFAULT, RECONCILER_CONCURRENCY_DEFAULT, }; use tokio::signal::unix::SignalKind; use tokio_util::sync::CancellationToken; +use tracing::Instrument; use utils::auth::{JwtAuth, SwappableJwtAuth}; use utils::logging::{self, LogFormat}; @@ -86,6 +88,10 @@ struct Cli { // TODO: make `cfg(feature = "testing")` #[arg(long)] neon_local_repo_dir: Option, + + /// Chaos testing + #[arg(long)] + chaos_interval: Option, } enum StrictMode { @@ -309,6 +315,22 @@ async fn async_main() -> anyhow::Result<()> { tracing::info!("Serving on {0}", args.listen); let server_task = tokio::task::spawn(server); + let chaos_task = args.chaos_interval.map(|interval| { + let service = service.clone(); + let cancel = CancellationToken::new(); + let cancel_bg = cancel.clone(); + ( + tokio::task::spawn( + async move { + let mut chaos_injector = ChaosInjector::new(service, interval.into()); + chaos_injector.run(cancel_bg).await + } + .instrument(tracing::info_span!("chaos_injector")), + ), + cancel, + ) + }); + // Wait until we receive a signal let mut sigint = tokio::signal::unix::signal(SignalKind::interrupt())?; let mut sigquit = tokio::signal::unix::signal(SignalKind::quit())?; @@ -337,6 +359,12 @@ async fn async_main() -> anyhow::Result<()> { } } + // If we were injecting chaos, stop that so that we're not calling into Service while it shuts down + if let Some((chaos_jh, chaos_cancel)) = chaos_task { + chaos_cancel.cancel(); + chaos_jh.await.ok(); + } + service.shutdown().await; tracing::info!("Service shutdown complete"); diff --git a/storage_controller/src/service.rs b/storage_controller/src/service.rs index ea515f67da..6940bf2c64 100644 --- a/storage_controller/src/service.rs +++ b/storage_controller/src/service.rs @@ -84,6 +84,8 @@ use crate::{ }; use serde::{Deserialize, Serialize}; +pub mod chaos_injector; + // For operations that should be quick, like attaching a new tenant const SHORT_RECONCILE_TIMEOUT: Duration = Duration::from_secs(5); diff --git a/storage_controller/src/service/chaos_injector.rs b/storage_controller/src/service/chaos_injector.rs new file mode 100644 index 0000000000..99961d691c --- /dev/null +++ b/storage_controller/src/service/chaos_injector.rs @@ -0,0 +1,71 @@ +use std::{sync::Arc, time::Duration}; + +use rand::seq::SliceRandom; +use rand::thread_rng; +use tokio_util::sync::CancellationToken; + +use super::Service; + +pub struct ChaosInjector { + service: Arc, + interval: Duration, +} + +impl ChaosInjector { + pub fn new(service: Arc, interval: Duration) -> Self { + Self { service, interval } + } + + pub async fn run(&mut self, cancel: CancellationToken) { + let mut interval = tokio::time::interval(self.interval); + + loop { + tokio::select! { + _ = interval.tick() => {} + _ = cancel.cancelled() => { + tracing::info!("Shutting down"); + return; + } + } + + self.inject_chaos().await; + + tracing::info!("Chaos iteration..."); + } + } + + async fn inject_chaos(&mut self) { + // Pick some shards to interfere with + let batch_size = 128; + let mut inner = self.service.inner.write().unwrap(); + let (nodes, tenants, scheduler) = inner.parts_mut(); + let tenant_ids = tenants.keys().cloned().collect::>(); + let victims = tenant_ids.choose_multiple(&mut thread_rng(), batch_size); + + for victim in victims { + let shard = tenants + .get_mut(victim) + .expect("Held lock between choosing ID and this get"); + + // Pick a secondary to promote + let Some(new_location) = shard + .intent + .get_secondary() + .choose(&mut thread_rng()) + .cloned() + else { + tracing::info!("Skipping shard {victim}: no secondary location, can't migrate"); + continue; + }; + + let Some(old_location) = *shard.intent.get_attached() else { + tracing::info!("Skipping shard {victim}: currently has no attached location"); + continue; + }; + + shard.intent.demote_attached(scheduler, old_location); + shard.intent.promote_attached(scheduler, new_location); + self.service.maybe_reconcile_shard(shard, nodes); + } + } +} From 7eb3d6bb2dec03925487e618ae26618f15e87c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arpad=20M=C3=BCller?= Date: Fri, 2 Aug 2024 13:07:12 +0200 Subject: [PATCH 33/37] Wait for completion of the upload queue in flush_frozen_layer (#8550) Makes `flush_frozen_layer` add a barrier to the upload queue and makes it wait for that barrier to be reached until it lets the flushing be completed. This gives us backpressure and ensures that writes can't build up in an unbounded fashion. Fixes #7317 --- compute_tools/Cargo.toml | 5 ++ compute_tools/src/compute.rs | 10 ++- control_plane/src/background_process.rs | 2 +- pageserver/src/tenant/timeline.rs | 20 ++++- test_runner/fixtures/neon_fixtures.py | 12 ++- test_runner/fixtures/pageserver/http.py | 2 + test_runner/regress/test_branching.py | 23 ++++-- test_runner/regress/test_remote_storage.py | 87 +++------------------- 8 files changed, 74 insertions(+), 87 deletions(-) diff --git a/compute_tools/Cargo.toml b/compute_tools/Cargo.toml index 8ceb8f2ad2..8af0ed43ce 100644 --- a/compute_tools/Cargo.toml +++ b/compute_tools/Cargo.toml @@ -4,6 +4,11 @@ version = "0.1.0" edition.workspace = true license.workspace = true +[features] +default = [] +# Enables test specific features. +testing = [] + [dependencies] anyhow.workspace = true async-compression.workspace = true diff --git a/compute_tools/src/compute.rs b/compute_tools/src/compute.rs index 91855d954d..5bd6897fe3 100644 --- a/compute_tools/src/compute.rs +++ b/compute_tools/src/compute.rs @@ -400,7 +400,15 @@ impl ComputeNode { pub fn get_basebackup(&self, compute_state: &ComputeState, lsn: Lsn) -> Result<()> { let mut retry_period_ms = 500.0; let mut attempts = 0; - let max_attempts = 10; + const DEFAULT_ATTEMPTS: u16 = 10; + #[cfg(feature = "testing")] + let max_attempts = if let Ok(v) = env::var("NEON_COMPUTE_TESTING_BASEBACKUP_RETRIES") { + u16::from_str(&v).unwrap() + } else { + DEFAULT_ATTEMPTS + }; + #[cfg(not(feature = "testing"))] + let max_attempts = DEFAULT_ATTEMPTS; loop { let result = self.try_get_basebackup(compute_state, lsn); match result { diff --git a/control_plane/src/background_process.rs b/control_plane/src/background_process.rs index a272c306e7..bf8a27e550 100644 --- a/control_plane/src/background_process.rs +++ b/control_plane/src/background_process.rs @@ -289,7 +289,7 @@ fn fill_remote_storage_secrets_vars(mut cmd: &mut Command) -> &mut Command { fn fill_env_vars_prefixed_neon(mut cmd: &mut Command) -> &mut Command { for (var, val) in std::env::vars() { - if var.starts_with("NEON_PAGESERVER_") { + if var.starts_with("NEON_") { cmd = cmd.env(var, val); } } diff --git a/pageserver/src/tenant/timeline.rs b/pageserver/src/tenant/timeline.rs index 37ebeded66..be72e15c19 100644 --- a/pageserver/src/tenant/timeline.rs +++ b/pageserver/src/tenant/timeline.rs @@ -143,7 +143,10 @@ use self::walreceiver::{WalReceiver, WalReceiverConf}; use super::{config::TenantConf, upload_queue::NotInitialized}; use super::{debug_assert_current_span_has_tenant_and_timeline_id, AttachedTenantConf}; use super::{remote_timeline_client::index::IndexPart, storage_layer::LayerFringe}; -use super::{remote_timeline_client::RemoteTimelineClient, storage_layer::ReadableLayer}; +use super::{ + remote_timeline_client::RemoteTimelineClient, remote_timeline_client::WaitCompletionError, + storage_layer::ReadableLayer, +}; use super::{ secondary::heatmap::{HeatMapLayer, HeatMapTimeline}, GcError, @@ -4089,6 +4092,21 @@ impl Timeline { // release lock on 'layers' }; + // Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files. + // This makes us refuse ingest until the new layers have been persisted to the remote. + self.remote_client + .wait_completion() + .await + .map_err(|e| match e { + WaitCompletionError::UploadQueueShutDownOrStopped + | WaitCompletionError::NotInitialized( + NotInitialized::ShuttingDown | NotInitialized::Stopped, + ) => FlushLayerError::Cancelled, + WaitCompletionError::NotInitialized(NotInitialized::Uninitialized) => { + FlushLayerError::Other(anyhow!(e).into()) + } + })?; + // FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`, // a compaction can delete the file and then it won't be available for uploads any more. // We still schedule the upload, resulting in an error, but ideally we'd somehow avoid this diff --git a/test_runner/fixtures/neon_fixtures.py b/test_runner/fixtures/neon_fixtures.py index b370a92e38..7289472de2 100644 --- a/test_runner/fixtures/neon_fixtures.py +++ b/test_runner/fixtures/neon_fixtures.py @@ -1943,11 +1943,15 @@ class NeonCli(AbstractNeonCli): remote_ext_config: Optional[str] = None, pageserver_id: Optional[int] = None, allow_multiple=False, + basebackup_request_tries: Optional[int] = None, ) -> "subprocess.CompletedProcess[str]": args = [ "endpoint", "start", ] + extra_env_vars = {} + if basebackup_request_tries is not None: + extra_env_vars["NEON_COMPUTE_TESTING_BASEBACKUP_TRIES"] = str(basebackup_request_tries) if remote_ext_config is not None: args.extend(["--remote-ext-config", remote_ext_config]) @@ -1960,7 +1964,7 @@ class NeonCli(AbstractNeonCli): if allow_multiple: args.extend(["--allow-multiple"]) - res = self.raw_cli(args) + res = self.raw_cli(args, extra_env_vars) res.check_returncode() return res @@ -3812,6 +3816,7 @@ class Endpoint(PgProtocol, LogUtils): pageserver_id: Optional[int] = None, safekeepers: Optional[List[int]] = None, allow_multiple: bool = False, + basebackup_request_tries: Optional[int] = None, ) -> "Endpoint": """ Start the Postgres instance. @@ -3833,6 +3838,7 @@ class Endpoint(PgProtocol, LogUtils): remote_ext_config=remote_ext_config, pageserver_id=pageserver_id, allow_multiple=allow_multiple, + basebackup_request_tries=basebackup_request_tries, ) self._running.release(1) @@ -3979,6 +3985,7 @@ class Endpoint(PgProtocol, LogUtils): remote_ext_config: Optional[str] = None, pageserver_id: Optional[int] = None, allow_multiple=False, + basebackup_request_tries: Optional[int] = None, ) -> "Endpoint": """ Create an endpoint, apply config, and start Postgres. @@ -3999,6 +4006,7 @@ class Endpoint(PgProtocol, LogUtils): remote_ext_config=remote_ext_config, pageserver_id=pageserver_id, allow_multiple=allow_multiple, + basebackup_request_tries=basebackup_request_tries, ) log.info(f"Postgres startup took {time.time() - started_at} seconds") @@ -4042,6 +4050,7 @@ class EndpointFactory: config_lines: Optional[List[str]] = None, remote_ext_config: Optional[str] = None, pageserver_id: Optional[int] = None, + basebackup_request_tries: Optional[int] = None, ) -> Endpoint: ep = Endpoint( self.env, @@ -4060,6 +4069,7 @@ class EndpointFactory: lsn=lsn, remote_ext_config=remote_ext_config, pageserver_id=pageserver_id, + basebackup_request_tries=basebackup_request_tries, ) def create( diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index c6df6b5baf..192324f086 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -663,6 +663,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): force_image_layer_creation=False, wait_until_uploaded=False, compact: Optional[bool] = None, + **kwargs, ): self.is_testing_enabled_or_skip() query = {} @@ -680,6 +681,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter): res = self.put( f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint", params=query, + **kwargs, ) log.info(f"Got checkpoint request response code: {res.status_code}") self.verbose_error(res) diff --git a/test_runner/regress/test_branching.py b/test_runner/regress/test_branching.py index 190b624a54..fc74707639 100644 --- a/test_runner/regress/test_branching.py +++ b/test_runner/regress/test_branching.py @@ -18,7 +18,6 @@ from fixtures.pageserver.utils import wait_until_tenant_active from fixtures.utils import query_scalar from performance.test_perf_pgbench import get_scales_matrix from requests import RequestException -from requests.exceptions import RetryError # Test branch creation @@ -151,7 +150,7 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.pageserver.allowed_errors.extend( [ ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*", - ".*page_service_conn_main.*: query handler for 'basebackup .* is not active, state: Loading", + ".*page_service_conn_main.*: query handler for 'basebackup .* ERROR: Not found: Timeline", ] ) ps_http = env.pageserver.http_client() @@ -176,10 +175,12 @@ def test_cannot_create_endpoint_on_non_uploaded_timeline(neon_env_builder: NeonE env.neon_cli.map_branch(initial_branch, env.initial_tenant, env.initial_timeline) - with pytest.raises(RuntimeError, match="is not active, state: Loading"): - env.endpoints.create_start(initial_branch, tenant_id=env.initial_tenant) + with pytest.raises(RuntimeError, match="ERROR: Not found: Timeline"): + env.endpoints.create_start( + initial_branch, tenant_id=env.initial_tenant, basebackup_request_tries=2 + ) + ps_http.configure_failpoints(("before-upload-index-pausable", "off")) finally: - # FIXME: paused uploads bother shutdown env.pageserver.stop(immediate=True) t.join() @@ -193,8 +194,11 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder env = neon_env_builder.init_configs() env.start() - env.pageserver.allowed_errors.append( - ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*" + env.pageserver.allowed_errors.extend( + [ + ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: request was dropped before completing.*", + ".*request{method=POST path=/v1/tenant/.*/timeline request_id=.*}: .*Cannot branch off the timeline that's not present in pageserver.*", + ] ) ps_http = env.pageserver.http_client() @@ -216,7 +220,10 @@ def test_cannot_branch_from_non_uploaded_branch(neon_env_builder: NeonEnvBuilder branch_id = TimelineId.generate() - with pytest.raises(RetryError, match="too many 503 error responses"): + with pytest.raises( + PageserverApiException, + match="Cannot branch off the timeline that's not present in pageserver", + ): ps_http.timeline_create( env.pg_version, env.initial_tenant, diff --git a/test_runner/regress/test_remote_storage.py b/test_runner/regress/test_remote_storage.py index 09f941f582..2e5260ca78 100644 --- a/test_runner/regress/test_remote_storage.py +++ b/test_runner/regress/test_remote_storage.py @@ -12,7 +12,6 @@ from fixtures.neon_fixtures import ( NeonEnvBuilder, wait_for_last_flush_lsn, ) -from fixtures.pageserver.common_types import parse_layer_file_name from fixtures.pageserver.http import PageserverApiException, PageserverHttpClient from fixtures.pageserver.utils import ( timeline_delete_wait_completed, @@ -313,6 +312,7 @@ def test_remote_storage_upload_queue_retries( def churn_while_failpoints_active(result): overwrite_data_and_wait_for_it_to_arrive_at_pageserver("c") + # this call will wait for the failpoints to be turned off client.timeline_checkpoint(tenant_id, timeline_id) client.timeline_compact(tenant_id, timeline_id) overwrite_data_and_wait_for_it_to_arrive_at_pageserver("d") @@ -332,8 +332,8 @@ def test_remote_storage_upload_queue_retries( # Exponential back-off in upload queue, so, gracious timeouts. wait_until(30, 1, lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="upload"), 0)) - wait_until(30, 1, lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 2)) - wait_until(30, 1, lambda: assert_gt(get_queued_count(file_kind="layer", op_kind="delete"), 0)) + wait_until(30, 1, lambda: assert_ge(get_queued_count(file_kind="index", op_kind="upload"), 1)) + wait_until(30, 1, lambda: assert_eq(get_queued_count(file_kind="layer", op_kind="delete"), 0)) # unblock churn operations configure_storage_sync_failpoints("off") @@ -769,11 +769,11 @@ def test_empty_branch_remote_storage_upload_on_restart(neon_env_builder: NeonEnv create_thread.join() -def test_compaction_waits_for_upload( +def test_paused_upload_stalls_checkpoint( neon_env_builder: NeonEnvBuilder, ): """ - This test forces a race between upload and compaction. + This test checks that checkpoints block on uploads to remote storage. """ neon_env_builder.enable_pageserver_remote_storage(RemoteStorageKind.LOCAL_FS) @@ -788,6 +788,10 @@ def test_compaction_waits_for_upload( } ) + env.pageserver.allowed_errors.append( + f".*PUT.* path=/v1/tenant/{env.initial_tenant}/timeline.* request was dropped before completing" + ) + tenant_id = env.initial_tenant timeline_id = env.initial_timeline @@ -808,76 +812,9 @@ def test_compaction_waits_for_upload( endpoint.safe_psql("CREATE TABLE foo AS SELECT x FROM generate_series(1, 10000) g(x)") wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) - client.timeline_checkpoint(tenant_id, timeline_id) - deltas_at_first = len(client.layer_map_info(tenant_id, timeline_id).delta_layers()) - assert ( - deltas_at_first == 2 - ), "are you fixing #5863? just add one more checkpoint after 'CREATE TABLE bar ...' statement." - - endpoint.safe_psql("CREATE TABLE bar AS SELECT x FROM generate_series(1, 10000) g(x)") - endpoint.safe_psql("UPDATE foo SET x = 0 WHERE x = 1") - wait_for_last_flush_lsn(env, endpoint, tenant_id, timeline_id) - - layers_before_last_checkpoint = client.layer_map_info(tenant_id, timeline_id).historic_by_name() - upload_stuck_layers = layers_before_last_checkpoint - layers_at_creation.historic_by_name() - - assert len(upload_stuck_layers) > 0 - - for name in upload_stuck_layers: - assert env.pageserver.layer_exists( - tenant_id, timeline_id, parse_layer_file_name(name) - ), "while uploads are stuck the layers should be present on disk" - - # now this will do the L0 => L1 compaction and want to remove - # upload_stuck_layers and the original initdb L0 - client.timeline_checkpoint(tenant_id, timeline_id) - - # as uploads are paused, the upload_stuck_layers should still be with us - for name in upload_stuck_layers: - assert env.pageserver.layer_exists( - tenant_id, timeline_id, parse_layer_file_name(name) - ), "uploads are stuck still over compaction" - - compacted_layers = client.layer_map_info(tenant_id, timeline_id).historic_by_name() - overlap = compacted_layers.intersection(upload_stuck_layers) - assert len(overlap) == 0, "none of the L0's should remain after L0 => L1 compaction" - assert ( - len(compacted_layers) == 1 - ), "there should be one L1 after L0 => L1 compaction (without #5863 being fixed)" - - def layer_deletes_completed(): - m = client.get_metric_value("pageserver_layer_completed_deletes_total") - if m is None: - return 0 - return int(m) - - # if initdb created an initial delta layer, it might already be gc'd - # because it was uploaded before the failpoint was enabled. however, the - # deletion is not guaranteed to be complete. - assert layer_deletes_completed() <= 1 - - client.configure_failpoints(("before-upload-layer-pausable", "off")) - - # Ensure that this actually terminates - wait_upload_queue_empty(client, tenant_id, timeline_id) - - def until_layer_deletes_completed(): - deletes = layer_deletes_completed() - log.info(f"layer_deletes: {deletes}") - # ensure that initdb delta layer AND the previously stuck are now deleted - assert deletes >= len(upload_stuck_layers) + 1 - - wait_until(10, 1, until_layer_deletes_completed) - - for name in upload_stuck_layers: - assert not env.pageserver.layer_exists( - tenant_id, timeline_id, parse_layer_file_name(name) - ), "l0 should now be removed because of L0 => L1 compaction and completed uploads" - - # We should not have hit the error handling path in uploads where a uploaded file is gone - assert not env.pageserver.log_contains( - "File to upload doesn't exist. Likely the file has been deleted and an upload is not required any more." - ) + with pytest.raises(ReadTimeout): + client.timeline_checkpoint(tenant_id, timeline_id, timeout=5) + client.configure_failpoints(("before-upload-layer-pausable", "off")) def wait_upload_queue_empty( From 1758c10dec4fbd361febbfc9f25d1c7c2b3bc892 Mon Sep 17 00:00:00 2001 From: Arthur Petukhovsky Date: Fri, 2 Aug 2024 15:26:46 +0100 Subject: [PATCH 34/37] Improve safekeepers eviction rate limiting (#8456) This commit tries to fix regular load spikes on staging, caused by too many eviction and partial upload operations running at the same time. Usually it was hapenning after restart, for partial backup the load was delayed. - Add a semaphore for evictions (2 permits by default) - Rename `resident_since` to `evict_not_before` and smooth out the curve by using random duration - Use random duration in partial uploads as well related to https://github.com/neondatabase/neon/issues/6338 some discussion in https://neondb.slack.com/archives/C033RQ5SPDH/p1720601531744029 --- safekeeper/src/lib.rs | 2 ++ safekeeper/src/rate_limit.rs | 49 ++++++++++++++++++++++++++ safekeeper/src/timeline.rs | 3 +- safekeeper/src/timeline_eviction.rs | 6 ++-- safekeeper/src/timeline_manager.rs | 48 ++++++++++++++++++------- safekeeper/src/timelines_global_map.rs | 14 +++++--- safekeeper/src/wal_backup_partial.rs | 39 +++++++------------- 7 files changed, 112 insertions(+), 49 deletions(-) create mode 100644 safekeeper/src/rate_limit.rs diff --git a/safekeeper/src/lib.rs b/safekeeper/src/lib.rs index 8f2920ada3..56d61e8287 100644 --- a/safekeeper/src/lib.rs +++ b/safekeeper/src/lib.rs @@ -21,6 +21,7 @@ pub mod json_ctrl; pub mod metrics; pub mod patch_control_file; pub mod pull_timeline; +pub mod rate_limit; pub mod receive_wal; pub mod recovery; pub mod remove_wal; @@ -53,6 +54,7 @@ pub mod defaults { pub const DEFAULT_PARTIAL_BACKUP_TIMEOUT: &str = "15m"; pub const DEFAULT_CONTROL_FILE_SAVE_INTERVAL: &str = "300s"; pub const DEFAULT_PARTIAL_BACKUP_CONCURRENCY: &str = "5"; + pub const DEFAULT_EVICTION_CONCURRENCY: usize = 2; // By default, our required residency before eviction is the same as the period that passes // before uploading a partial segment, so that in normal operation the eviction can happen diff --git a/safekeeper/src/rate_limit.rs b/safekeeper/src/rate_limit.rs new file mode 100644 index 0000000000..72373b5786 --- /dev/null +++ b/safekeeper/src/rate_limit.rs @@ -0,0 +1,49 @@ +use std::sync::Arc; + +use rand::Rng; + +use crate::metrics::MISC_OPERATION_SECONDS; + +/// Global rate limiter for background tasks. +#[derive(Clone)] +pub struct RateLimiter { + partial_backup: Arc, + eviction: Arc, +} + +impl RateLimiter { + /// Create a new rate limiter. + /// - `partial_backup_max`: maximum number of concurrent partial backups. + /// - `eviction_max`: maximum number of concurrent timeline evictions. + pub fn new(partial_backup_max: usize, eviction_max: usize) -> Self { + Self { + partial_backup: Arc::new(tokio::sync::Semaphore::new(partial_backup_max)), + eviction: Arc::new(tokio::sync::Semaphore::new(eviction_max)), + } + } + + /// Get a permit for partial backup. This will block if the maximum number of concurrent + /// partial backups is reached. + pub async fn acquire_partial_backup(&self) -> tokio::sync::OwnedSemaphorePermit { + let _timer = MISC_OPERATION_SECONDS + .with_label_values(&["partial_permit_acquire"]) + .start_timer(); + self.partial_backup + .clone() + .acquire_owned() + .await + .expect("semaphore is closed") + } + + /// Try to get a permit for timeline eviction. This will return None if the maximum number of + /// concurrent timeline evictions is reached. + pub fn try_acquire_eviction(&self) -> Option { + self.eviction.clone().try_acquire_owned().ok() + } +} + +/// Generate a random duration that is a fraction of the given duration. +pub fn rand_duration(duration: &std::time::Duration) -> std::time::Duration { + let randf64 = rand::thread_rng().gen_range(0.0..1.0); + duration.mul_f64(randf64) +} diff --git a/safekeeper/src/timeline.rs b/safekeeper/src/timeline.rs index 132e5ec32f..57935d879f 100644 --- a/safekeeper/src/timeline.rs +++ b/safekeeper/src/timeline.rs @@ -25,6 +25,7 @@ use utils::{ use storage_broker::proto::SafekeeperTimelineInfo; use storage_broker::proto::TenantTimelineId as ProtoTenantTimelineId; +use crate::rate_limit::RateLimiter; use crate::receive_wal::WalReceivers; use crate::safekeeper::{ AcceptorProposerMessage, ProposerAcceptorMessage, SafeKeeper, ServerInfo, Term, TermLsn, @@ -36,7 +37,7 @@ use crate::timeline_guard::ResidenceGuard; use crate::timeline_manager::{AtomicStatus, ManagerCtl}; use crate::timelines_set::TimelinesSet; use crate::wal_backup::{self}; -use crate::wal_backup_partial::{PartialRemoteSegment, RateLimiter}; +use crate::wal_backup_partial::PartialRemoteSegment; use crate::{control_file, safekeeper::UNKNOWN_SERVER_VERSION}; use crate::metrics::{FullTimelineInfo, WalStorageMetrics, MISC_OPERATION_SECONDS}; diff --git a/safekeeper/src/timeline_eviction.rs b/safekeeper/src/timeline_eviction.rs index 7947d83eb4..ae6f3f4b7e 100644 --- a/safekeeper/src/timeline_eviction.rs +++ b/safekeeper/src/timeline_eviction.rs @@ -5,7 +5,6 @@ use anyhow::Context; use camino::Utf8PathBuf; use remote_storage::RemotePath; -use std::time::Instant; use tokio::{ fs::File, io::{AsyncRead, AsyncWriteExt}, @@ -15,6 +14,7 @@ use utils::crashsafe::durable_rename; use crate::{ metrics::{EvictionEvent, EVICTION_EVENTS_COMPLETED, EVICTION_EVENTS_STARTED}, + rate_limit::rand_duration, timeline_manager::{Manager, StateSnapshot}, wal_backup, wal_backup_partial::{self, PartialRemoteSegment}, @@ -50,7 +50,6 @@ impl Manager { .flush_lsn .segment_number(self.wal_seg_size) == self.last_removed_segno + 1 - && self.resident_since.elapsed() >= self.conf.eviction_min_resident } /// Evict the timeline to remote storage. @@ -112,7 +111,8 @@ impl Manager { return; } - self.resident_since = Instant::now(); + self.evict_not_before = + tokio::time::Instant::now() + rand_duration(&self.conf.eviction_min_resident); info!("successfully restored evicted timeline"); } diff --git a/safekeeper/src/timeline_manager.rs b/safekeeper/src/timeline_manager.rs index debf8c824f..c224dcd398 100644 --- a/safekeeper/src/timeline_manager.rs +++ b/safekeeper/src/timeline_manager.rs @@ -23,6 +23,7 @@ use utils::lsn::Lsn; use crate::{ control_file::{FileStorage, Storage}, metrics::{MANAGER_ACTIVE_CHANGES, MANAGER_ITERATIONS_TOTAL, MISC_OPERATION_SECONDS}, + rate_limit::{rand_duration, RateLimiter}, recovery::recovery_main, remove_wal::calc_horizon_lsn, safekeeper::Term, @@ -32,7 +33,7 @@ use crate::{ timeline_guard::{AccessService, GuardId, ResidenceGuard}, timelines_set::{TimelineSetGuard, TimelinesSet}, wal_backup::{self, WalBackupTaskHandle}, - wal_backup_partial::{self, PartialRemoteSegment, RateLimiter}, + wal_backup_partial::{self, PartialRemoteSegment}, SafeKeeperConf, }; @@ -185,11 +186,11 @@ pub(crate) struct Manager { // misc pub(crate) access_service: AccessService, - pub(crate) partial_backup_rate_limiter: RateLimiter, + pub(crate) global_rate_limiter: RateLimiter, // Anti-flapping state: we evict timelines eagerly if they are inactive, but should not // evict them if they go inactive very soon after being restored. - pub(crate) resident_since: std::time::Instant, + pub(crate) evict_not_before: Instant, } /// This task gets spawned alongside each timeline and is responsible for managing the timeline's @@ -202,7 +203,7 @@ pub async fn main_task( broker_active_set: Arc, manager_tx: tokio::sync::mpsc::UnboundedSender, mut manager_rx: tokio::sync::mpsc::UnboundedReceiver, - partial_backup_rate_limiter: RateLimiter, + global_rate_limiter: RateLimiter, ) { tli.set_status(Status::Started); @@ -220,7 +221,7 @@ pub async fn main_task( conf, broker_active_set, manager_tx, - partial_backup_rate_limiter, + global_rate_limiter, ) .await; @@ -254,9 +255,29 @@ pub async fn main_task( mgr.set_status(Status::UpdatePartialBackup); mgr.update_partial_backup(&state_snapshot).await; - if mgr.conf.enable_offload && mgr.ready_for_eviction(&next_event, &state_snapshot) { - mgr.set_status(Status::EvictTimeline); - mgr.evict_timeline().await; + let now = Instant::now(); + if mgr.evict_not_before > now { + // we should wait until evict_not_before + update_next_event(&mut next_event, mgr.evict_not_before); + } + + if mgr.conf.enable_offload + && mgr.evict_not_before <= now + && mgr.ready_for_eviction(&next_event, &state_snapshot) + { + // check rate limiter and evict timeline if possible + match mgr.global_rate_limiter.try_acquire_eviction() { + Some(_permit) => { + mgr.set_status(Status::EvictTimeline); + mgr.evict_timeline().await; + } + None => { + // we can't evict timeline now, will try again later + mgr.evict_not_before = + Instant::now() + rand_duration(&mgr.conf.eviction_min_resident); + update_next_event(&mut next_event, mgr.evict_not_before); + } + } } } @@ -334,11 +355,10 @@ impl Manager { conf: SafeKeeperConf, broker_active_set: Arc, manager_tx: tokio::sync::mpsc::UnboundedSender, - partial_backup_rate_limiter: RateLimiter, + global_rate_limiter: RateLimiter, ) -> Manager { let (is_offloaded, partial_backup_uploaded) = tli.bootstrap_mgr().await; Manager { - conf, wal_seg_size: tli.get_wal_seg_size().await, walsenders: tli.get_walsenders().clone(), state_version_rx: tli.get_state_version_rx(), @@ -353,8 +373,10 @@ impl Manager { partial_backup_uploaded, access_service: AccessService::new(manager_tx), tli, - partial_backup_rate_limiter, - resident_since: std::time::Instant::now(), + global_rate_limiter, + // to smooth out evictions spike after restart + evict_not_before: Instant::now() + rand_duration(&conf.eviction_min_resident), + conf, } } @@ -541,7 +563,7 @@ impl Manager { self.partial_backup_task = Some(tokio::spawn(wal_backup_partial::main_task( self.wal_resident_timeline(), self.conf.clone(), - self.partial_backup_rate_limiter.clone(), + self.global_rate_limiter.clone(), ))); } diff --git a/safekeeper/src/timelines_global_map.rs b/safekeeper/src/timelines_global_map.rs index f57da5c7cb..6662e18817 100644 --- a/safekeeper/src/timelines_global_map.rs +++ b/safekeeper/src/timelines_global_map.rs @@ -2,10 +2,11 @@ //! All timelines should always be present in this map, this is done by loading them //! all from the disk on startup and keeping them in memory. +use crate::defaults::DEFAULT_EVICTION_CONCURRENCY; +use crate::rate_limit::RateLimiter; use crate::safekeeper::ServerInfo; use crate::timeline::{get_tenant_dir, get_timeline_dir, Timeline, TimelineError}; use crate::timelines_set::TimelinesSet; -use crate::wal_backup_partial::RateLimiter; use crate::SafeKeeperConf; use anyhow::{bail, Context, Result}; use camino::Utf8PathBuf; @@ -31,7 +32,7 @@ struct GlobalTimelinesState { conf: Option, broker_active_set: Arc, load_lock: Arc>, - partial_backup_rate_limiter: RateLimiter, + global_rate_limiter: RateLimiter, } // Used to prevent concurrent timeline loading. @@ -50,7 +51,7 @@ impl GlobalTimelinesState { ( self.get_conf().clone(), self.broker_active_set.clone(), - self.partial_backup_rate_limiter.clone(), + self.global_rate_limiter.clone(), ) } @@ -85,7 +86,7 @@ static TIMELINES_STATE: Lazy> = Lazy::new(|| { conf: None, broker_active_set: Arc::new(TimelinesSet::default()), load_lock: Arc::new(tokio::sync::Mutex::new(TimelineLoadLock)), - partial_backup_rate_limiter: RateLimiter::new(1), + global_rate_limiter: RateLimiter::new(1, 1), }) }); @@ -99,7 +100,10 @@ impl GlobalTimelines { // lock, so use explicit block let tenants_dir = { let mut state = TIMELINES_STATE.lock().unwrap(); - state.partial_backup_rate_limiter = RateLimiter::new(conf.partial_backup_concurrency); + state.global_rate_limiter = RateLimiter::new( + conf.partial_backup_concurrency, + DEFAULT_EVICTION_CONCURRENCY, + ); state.conf = Some(conf); // Iterate through all directories and load tenants for all directories diff --git a/safekeeper/src/wal_backup_partial.rs b/safekeeper/src/wal_backup_partial.rs index b1efa9749f..52765b0e98 100644 --- a/safekeeper/src/wal_backup_partial.rs +++ b/safekeeper/src/wal_backup_partial.rs @@ -18,8 +18,6 @@ //! This way control file stores information about all potentially existing //! remote partial segments and can clean them up after uploading a newer version. -use std::sync::Arc; - use camino::Utf8PathBuf; use postgres_ffi::{XLogFileName, XLogSegNo, PG_TLI}; use remote_storage::RemotePath; @@ -30,6 +28,7 @@ use utils::lsn::Lsn; use crate::{ metrics::{MISC_OPERATION_SECONDS, PARTIAL_BACKUP_UPLOADED_BYTES, PARTIAL_BACKUP_UPLOADS}, + rate_limit::{rand_duration, RateLimiter}, safekeeper::Term, timeline::WalResidentTimeline, timeline_manager::StateSnapshot, @@ -37,30 +36,6 @@ use crate::{ SafeKeeperConf, }; -#[derive(Clone)] -pub struct RateLimiter { - semaphore: Arc, -} - -impl RateLimiter { - pub fn new(permits: usize) -> Self { - Self { - semaphore: Arc::new(tokio::sync::Semaphore::new(permits)), - } - } - - async fn acquire_owned(&self) -> tokio::sync::OwnedSemaphorePermit { - let _timer = MISC_OPERATION_SECONDS - .with_label_values(&["partial_permit_acquire"]) - .start_timer(); - self.semaphore - .clone() - .acquire_owned() - .await - .expect("semaphore is closed") - } -} - #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub enum UploadStatus { /// Upload is in progress. This status should be used only for garbage collection, @@ -352,6 +327,7 @@ pub async fn main_task( ) -> Option { debug!("started"); let await_duration = conf.partial_backup_timeout; + let mut first_iteration = true; let (_, persistent_state) = tli.get_state().await; let mut commit_lsn_rx = tli.get_commit_lsn_watch_rx(); @@ -419,6 +395,15 @@ pub async fn main_task( } } + // smoothing the load after restart, by sleeping for a random time. + // if this is not the first iteration, we will wait for the full await_duration + let await_duration = if first_iteration { + first_iteration = false; + rand_duration(&await_duration) + } else { + await_duration + }; + // fixing the segno and waiting some time to prevent reuploading the same segment too often let pending_segno = backup.segno(flush_lsn_rx.borrow().lsn); let timeout = tokio::time::sleep(await_duration); @@ -454,7 +439,7 @@ pub async fn main_task( } // limit concurrent uploads - let _upload_permit = limiter.acquire_owned().await; + let _upload_permit = limiter.acquire_partial_backup().await; let prepared = backup.prepare_upload().await; if let Some(seg) = &uploaded_segment { From 3a10bf8c823f7b93405454e0befd73f1cc81f140 Mon Sep 17 00:00:00 2001 From: John Spray Date: Fri, 2 Aug 2024 18:28:23 +0100 Subject: [PATCH 35/37] tests: add test_historic_storage_formats (#8423) ## Problem Currently, our backward compatibility tests only look one release back. That means, for example, that when we switch on image layer compression by default, we'll test reading of uncompressed layers for one release, and then stop doing it. When we make an index_part.json format change, we'll test against the old format for a week, then stop (unless we write separate unit tests for each old format). The reality in the field is that data in old formats will continue to exist for weeks/months/years. When we make major format changes, we should retain examples of the old format data, and continuously verify that the latest code can still read them. This test uses contents from a new path in the public S3 bucket, `compatibility-data-snapshots/`. It is populated by hand. The first important artifact is one from before we switch on compression, so that we will keep testing reads of uncompressed data. We will generate more artifacts ahead of other key changes, like when we update remote storage format for archival timelines. Closes: https://github.com/neondatabase/cloud/issues/15576 --- test_runner/regress/test_compatibility.py | 142 ++++++++++++++++++++-- 1 file changed, 135 insertions(+), 7 deletions(-) diff --git a/test_runner/regress/test_compatibility.py b/test_runner/regress/test_compatibility.py index 411b20b2c4..137b0e931d 100644 --- a/test_runner/regress/test_compatibility.py +++ b/test_runner/regress/test_compatibility.py @@ -3,18 +3,15 @@ import re import shutil import subprocess import tempfile +from dataclasses import dataclass from pathlib import Path from typing import List, Optional import pytest import toml -from fixtures.common_types import Lsn +from fixtures.common_types import Lsn, TenantId, TimelineId from fixtures.log_helper import log -from fixtures.neon_fixtures import ( - NeonEnv, - NeonEnvBuilder, - PgBin, -) +from fixtures.neon_fixtures import NeonEnv, NeonEnvBuilder, PgBin from fixtures.pageserver.http import PageserverApiException from fixtures.pageserver.utils import ( timeline_delete_wait_completed, @@ -22,7 +19,8 @@ from fixtures.pageserver.utils import ( wait_for_upload, ) from fixtures.pg_version import PgVersion -from fixtures.remote_storage import RemoteStorageKind +from fixtures.remote_storage import RemoteStorageKind, S3Storage, s3_storage +from fixtures.workload import Workload # # A test suite that help to prevent unintentionally breaking backward or forward compatibility between Neon releases. @@ -409,3 +407,133 @@ def dump_differs( break return differs + + +@dataclass +class HistoricDataSet: + name: str + tenant_id: TenantId + pg_version: PgVersion + url: str + + def __str__(self): + return self.name + + +HISTORIC_DATA_SETS = [ + # From before we enabled image layer compression. + # - IndexPart::LATEST_VERSION 7 + # - STORAGE_FORMAT_VERSION 3 + HistoricDataSet( + "2024-07-18", + TenantId("17bf64a53509714687664b3a84e9b3ba"), + PgVersion.V16, + "https://neon-github-public-dev.s3.eu-central-1.amazonaws.com/compatibility-data-snapshots/2024-07-18-pgv16.tar.zst", + ), +] + + +@pytest.mark.parametrize("dataset", HISTORIC_DATA_SETS) +@pytest.mark.xdist_group("compatibility") +def test_historic_storage_formats( + neon_env_builder: NeonEnvBuilder, + test_output_dir: Path, + pg_version: PgVersion, + dataset: HistoricDataSet, +): + """ + This test is like test_backward_compatibility, but it looks back further to examples of our storage format from long ago. + """ + + ARTIFACT_CACHE_DIR = "./artifact_cache" + + import tarfile + from contextlib import closing + + import requests + import zstandard + + artifact_unpack_path = ARTIFACT_CACHE_DIR / Path("unpacked") / Path(dataset.name) + + # Note: we assume that when running across a matrix of PG versions, the matrix includes all the versions needed by + # HISTORIC_DATA_SETS. If we ever remove a PG version from the matrix, then historic datasets built using that version + # will no longer be covered by this test. + if pg_version != dataset.pg_version: + pytest.skip(f"Dataset {dataset} is for different PG version, skipping") + + with closing(requests.get(dataset.url, stream=True)) as r: + unzstd = zstandard.ZstdDecompressor() + with unzstd.stream_reader(r.raw) as stream: + with tarfile.open(mode="r|", fileobj=stream) as tf: + tf.extractall(artifact_unpack_path) + + neon_env_builder.enable_pageserver_remote_storage(s3_storage()) + neon_env_builder.pg_version = dataset.pg_version + env = neon_env_builder.init_configs() + env.start() + assert isinstance(env.pageserver_remote_storage, S3Storage) + + # Link artifact data into test's remote storage. We don't want the whole repo dir, just the remote storage part: we are not testing + # compat of local disk data across releases (test_backward_compat does that), we're testing really long-lived data in S3 like layer files and indices. + # + # The code generating the snapshot uses local_fs, but this test uses S3Storage, so we are copying a tree of files into a bucket. We use + # S3Storage so that the scrubber can run (the scrubber doesn't speak local_fs) + artifact_pageserver_path = ( + artifact_unpack_path / Path("repo") / Path("local_fs_remote_storage") / Path("pageserver") + ) + for root, _dirs, files in os.walk(artifact_pageserver_path): + for file in files: + local_path = os.path.join(root, file) + remote_key = ( + env.pageserver_remote_storage.prefix_in_bucket + + str(local_path)[len(str(artifact_pageserver_path)) :] + ) + log.info(f"Uploading {local_path} -> {remote_key}") + env.pageserver_remote_storage.client.upload_file( + local_path, env.pageserver_remote_storage.bucket_name, remote_key + ) + + # Check the scrubber handles this old data correctly (can read it and doesn't consider it corrupt) + # + # Do this _before_ importing to the pageserver, as that import may start writing immediately + metadata_summary = env.storage_scrubber.scan_metadata() + assert metadata_summary["tenant_count"] >= 1 + assert metadata_summary["timeline_count"] >= 1 + assert not metadata_summary["with_errors"] + assert not metadata_summary["with_warnings"] + + env.neon_cli.import_tenant(dataset.tenant_id) + + # Discover timelines + timelines = env.pageserver.http_client().timeline_list(dataset.tenant_id) + # All our artifacts should contain at least one timeline + assert len(timelines) > 0 + + # TODO: ensure that the snapshots we're importing contain a sensible variety of content, at the very + # least they should include a mixture of deltas and image layers. Preferably they should also + # contain some "exotic" stuff like aux files from logical replication. + + # Check we can start an endpoint and read the SQL that the artifact is meant to contain + reference_sql_dump = artifact_unpack_path / Path("dump.sql") + ep = env.endpoints.create_start("main", tenant_id=dataset.tenant_id) + pg_bin = PgBin(test_output_dir, env.pg_distrib_dir, env.pg_version) + pg_bin.run_capture( + ["pg_dumpall", f"--dbname={ep.connstr()}", f"--file={test_output_dir / 'dump.sql'}"] + ) + assert not dump_differs( + reference_sql_dump, + test_output_dir / "dump.sql", + test_output_dir / "dump.filediff", + ) + ep.stop() + + # Check we can also do writes to the database + existing_timeline_id = TimelineId(timelines[0]["timeline_id"]) + workload = Workload(env, dataset.tenant_id, existing_timeline_id) + workload.init() + workload.write_rows(100) + + # Check that compaction works + env.pageserver.http_client().timeline_compact( + dataset.tenant_id, existing_timeline_id, force_image_layer_creation=True + ) From 8624aabc984f504dd9b89f47984e1ab7f792e8e1 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Fri, 2 Aug 2024 19:52:04 -0400 Subject: [PATCH 36/37] fix(pageserver): deadlock in gc-compaction (#8590) We need both compaction and gc lock for gc-compaction. The lock order should be the same everywhere, otherwise there could be a deadlock where A waits for B and B waits for A. We also had a double-lock issue. The compaction lock gets acquired in the outer `compact` function. Note that the unit tests directly call `compact_with_gc`, and therefore not triggering the issue. ## Summary of changes Ensure all places acquire compact lock and then gc lock. Remove an extra compact lock acqusition. --------- Signed-off-by: Alex Chi Z --- pageserver/src/tenant/timeline/compaction.rs | 28 ++++++++++++-------- pageserver/src/tenant/timeline/delete.rs | 20 ++++++++++---- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/pageserver/src/tenant/timeline/compaction.rs b/pageserver/src/tenant/timeline/compaction.rs index 61d662d25d..421f718ad6 100644 --- a/pageserver/src/tenant/timeline/compaction.rs +++ b/pageserver/src/tenant/timeline/compaction.rs @@ -1646,19 +1646,23 @@ impl Timeline { use std::collections::BTreeSet; // Block other compaction/GC tasks from running for now. GC-compaction could run along - // with legacy compaction tasks in the future. + // with legacy compaction tasks in the future. Always ensure the lock order is compaction -> gc. + // Note that we already acquired the compaction lock when the outer `compact` function gets called. - let _compaction_lock = tokio::select! { - guard = self.compaction_lock.lock() => guard, - // TODO: refactor to CompactionError to correctly pass cancelled error - _ = cancel.cancelled() => return Err(anyhow!("cancelled")), + let gc_lock = async { + tokio::select! { + guard = self.gc_lock.lock() => Ok(guard), + // TODO: refactor to CompactionError to correctly pass cancelled error + _ = cancel.cancelled() => Err(anyhow!("cancelled")), + } }; - let _gc = tokio::select! { - guard = self.gc_lock.lock() => guard, - // TODO: refactor to CompactionError to correctly pass cancelled error - _ = cancel.cancelled() => return Err(anyhow!("cancelled")), - }; + let gc_lock = crate::timed( + gc_lock, + "acquires gc lock", + std::time::Duration::from_secs(5), + ) + .await?; info!("running enhanced gc bottom-most compaction"); @@ -2063,9 +2067,11 @@ impl Timeline { let mut guard = self.layers.write().await; guard.finish_gc_compaction(&layer_selection, &compact_to, &self.metrics) }; - self.remote_client .schedule_compaction_update(&layer_selection, &compact_to)?; + + drop(gc_lock); + Ok(()) } } diff --git a/pageserver/src/tenant/timeline/delete.rs b/pageserver/src/tenant/timeline/delete.rs index 9b2403f899..05178c38b4 100644 --- a/pageserver/src/tenant/timeline/delete.rs +++ b/pageserver/src/tenant/timeline/delete.rs @@ -63,10 +63,19 @@ pub(super) async fn delete_local_timeline_directory( tenant_shard_id: TenantShardId, timeline: &Timeline, ) -> anyhow::Result<()> { - let guards = async { tokio::join!(timeline.gc_lock.lock(), timeline.compaction_lock.lock()) }; - let guards = crate::timed( - guards, - "acquire gc and compaction locks", + // Always ensure the lock order is compaction -> gc. + let compaction_lock = timeline.compaction_lock.lock(); + let compaction_lock = crate::timed( + compaction_lock, + "acquires compaction lock", + std::time::Duration::from_secs(5), + ) + .await; + + let gc_lock = timeline.gc_lock.lock(); + let gc_lock = crate::timed( + gc_lock, + "acquires gc lock", std::time::Duration::from_secs(5), ) .await; @@ -107,7 +116,8 @@ pub(super) async fn delete_local_timeline_directory( .context("fsync_pre_mark_remove")?; info!("finished deleting layer files, releasing locks"); - drop(guards); + drop(gc_lock); + drop(compaction_lock); fail::fail_point!("timeline-delete-after-rm", |_| { Err(anyhow::anyhow!("failpoint: timeline-delete-after-rm"))? From fde8aa103ebe3189a74e8c3e4beb7a3eaba0c7e4 Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." Date: Mon, 5 Aug 2024 13:55:36 +0800 Subject: [PATCH 37/37] feat(pageserver): support auto split layers based on size (#8574) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit part of https://github.com/neondatabase/neon/issues/8002 ## Summary of changes Add a `SplitImageWriter` that automatically splits image layer based on estimated target image layer size. This does not consider compression and we might need a better metrics. --------- Signed-off-by: Alex Chi Z Co-authored-by: Arpad Müller --- pageserver/src/tenant/storage_layer.rs | 3 + .../src/tenant/storage_layer/image_layer.rs | 58 ++++- .../src/tenant/storage_layer/split_writer.rs | 244 ++++++++++++++++++ 3 files changed, 303 insertions(+), 2 deletions(-) create mode 100644 pageserver/src/tenant/storage_layer/split_writer.rs diff --git a/pageserver/src/tenant/storage_layer.rs b/pageserver/src/tenant/storage_layer.rs index 4fd110359b..59d3e1ce09 100644 --- a/pageserver/src/tenant/storage_layer.rs +++ b/pageserver/src/tenant/storage_layer.rs @@ -8,6 +8,9 @@ mod layer_desc; mod layer_name; pub mod merge_iterator; +#[cfg(test)] +pub mod split_writer; + use crate::context::{AccessStatsBehavior, RequestContext}; use crate::repository::Value; use crate::walrecord::NeonWalRecord; diff --git a/pageserver/src/tenant/storage_layer/image_layer.rs b/pageserver/src/tenant/storage_layer/image_layer.rs index 08db27514a..aa308ba3c1 100644 --- a/pageserver/src/tenant/storage_layer/image_layer.rs +++ b/pageserver/src/tenant/storage_layer/image_layer.rs @@ -742,8 +742,14 @@ struct ImageLayerWriterInner { // where we have chosen their compressed form uncompressed_bytes_chosen: u64, + // Number of keys in the layer. + num_keys: usize, + blob_writer: BlobWriter, tree: DiskBtreeBuilder, + + #[cfg_attr(not(feature = "testing"), allow(dead_code))] + last_written_key: Key, } impl ImageLayerWriterInner { @@ -800,6 +806,8 @@ impl ImageLayerWriterInner { uncompressed_bytes: 0, uncompressed_bytes_eligible: 0, uncompressed_bytes_chosen: 0, + num_keys: 0, + last_written_key: Key::MIN, }; Ok(writer) @@ -820,6 +828,7 @@ impl ImageLayerWriterInner { let compression = self.conf.image_compression; let uncompressed_len = img.len() as u64; self.uncompressed_bytes += uncompressed_len; + self.num_keys += 1; let (_img, res) = self .blob_writer .write_blob_maybe_compressed(img, ctx, compression) @@ -839,6 +848,11 @@ impl ImageLayerWriterInner { key.write_to_byte_slice(&mut keybuf); self.tree.append(&keybuf, off)?; + #[cfg(feature = "testing")] + { + self.last_written_key = key; + } + Ok(()) } @@ -849,6 +863,7 @@ impl ImageLayerWriterInner { self, timeline: &Arc, ctx: &RequestContext, + end_key: Option, ) -> anyhow::Result { let index_start_blk = ((self.blob_writer.size() + PAGE_SZ as u64 - 1) / PAGE_SZ as u64) as u32; @@ -899,11 +914,23 @@ impl ImageLayerWriterInner { let desc = PersistentLayerDesc::new_img( self.tenant_shard_id, self.timeline_id, - self.key_range.clone(), + if let Some(end_key) = end_key { + self.key_range.start..end_key + } else { + self.key_range.clone() + }, self.lsn, metadata.len(), ); + #[cfg(feature = "testing")] + if let Some(end_key) = end_key { + assert!( + self.last_written_key < end_key, + "written key violates end_key range" + ); + } + // Note: Because we open the file in write-only mode, we cannot // reuse the same VirtualFile for reading later. That's why we don't // set inner.file here. The first read will have to re-open it. @@ -980,6 +1007,18 @@ impl ImageLayerWriter { self.inner.as_mut().unwrap().put_image(key, img, ctx).await } + #[cfg(test)] + /// Estimated size of the image layer. + pub(crate) fn estimated_size(&self) -> u64 { + let inner = self.inner.as_ref().unwrap(); + inner.blob_writer.size() + inner.tree.borrow_writer().size() + PAGE_SZ as u64 + } + + #[cfg(test)] + pub(crate) fn num_keys(&self) -> usize { + self.inner.as_ref().unwrap().num_keys + } + /// /// Finish writing the image layer. /// @@ -988,7 +1027,22 @@ impl ImageLayerWriter { timeline: &Arc, ctx: &RequestContext, ) -> anyhow::Result { - self.inner.take().unwrap().finish(timeline, ctx).await + self.inner.take().unwrap().finish(timeline, ctx, None).await + } + + #[cfg(test)] + /// Finish writing the image layer with an end key, used in [`super::split_writer::SplitImageLayerWriter`]. The end key determines the end of the image layer's covered range and is exclusive. + pub(super) async fn finish_with_end_key( + mut self, + timeline: &Arc, + end_key: Key, + ctx: &RequestContext, + ) -> anyhow::Result { + self.inner + .take() + .unwrap() + .finish(timeline, ctx, Some(end_key)) + .await } } diff --git a/pageserver/src/tenant/storage_layer/split_writer.rs b/pageserver/src/tenant/storage_layer/split_writer.rs new file mode 100644 index 0000000000..a4091a890c --- /dev/null +++ b/pageserver/src/tenant/storage_layer/split_writer.rs @@ -0,0 +1,244 @@ +use std::sync::Arc; + +use bytes::Bytes; +use pageserver_api::key::{Key, KEY_SIZE}; +use utils::{id::TimelineId, lsn::Lsn, shard::TenantShardId}; + +use crate::{config::PageServerConf, context::RequestContext, tenant::Timeline}; + +use super::{ImageLayerWriter, ResidentLayer}; + +/// An image writer that takes images and produces multiple image layers. The interface does not +/// guarantee atomicity (i.e., if the image layer generation fails, there might be leftover files +/// to be cleaned up) +#[must_use] +pub struct SplitImageLayerWriter { + inner: ImageLayerWriter, + target_layer_size: u64, + generated_layers: Vec, + conf: &'static PageServerConf, + timeline_id: TimelineId, + tenant_shard_id: TenantShardId, + lsn: Lsn, +} + +impl SplitImageLayerWriter { + pub async fn new( + conf: &'static PageServerConf, + timeline_id: TimelineId, + tenant_shard_id: TenantShardId, + start_key: Key, + lsn: Lsn, + target_layer_size: u64, + ctx: &RequestContext, + ) -> anyhow::Result { + Ok(Self { + target_layer_size, + inner: ImageLayerWriter::new( + conf, + timeline_id, + tenant_shard_id, + &(start_key..Key::MAX), + lsn, + ctx, + ) + .await?, + generated_layers: Vec::new(), + conf, + timeline_id, + tenant_shard_id, + lsn, + }) + } + + pub async fn put_image( + &mut self, + key: Key, + img: Bytes, + tline: &Arc, + ctx: &RequestContext, + ) -> anyhow::Result<()> { + // The current estimation is an upper bound of the space that the key/image could take + // because we did not consider compression in this estimation. The resulting image layer + // could be smaller than the target size. + let addition_size_estimation = KEY_SIZE as u64 + img.len() as u64; + if self.inner.num_keys() >= 1 + && self.inner.estimated_size() + addition_size_estimation >= self.target_layer_size + { + let next_image_writer = ImageLayerWriter::new( + self.conf, + self.timeline_id, + self.tenant_shard_id, + &(key..Key::MAX), + self.lsn, + ctx, + ) + .await?; + let prev_image_writer = std::mem::replace(&mut self.inner, next_image_writer); + self.generated_layers.push( + prev_image_writer + .finish_with_end_key(tline, key, ctx) + .await?, + ); + } + self.inner.put_image(key, img, ctx).await + } + + pub(crate) async fn finish( + self, + tline: &Arc, + ctx: &RequestContext, + end_key: Key, + ) -> anyhow::Result> { + let Self { + mut generated_layers, + inner, + .. + } = self; + generated_layers.push(inner.finish_with_end_key(tline, end_key, ctx).await?); + Ok(generated_layers) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + tenant::{ + harness::{TenantHarness, TIMELINE_ID}, + storage_layer::AsLayerDesc, + }, + DEFAULT_PG_VERSION, + }; + + use super::*; + + fn get_key(id: u32) -> Key { + let mut key = Key::from_hex("000000000033333333444444445500000000").unwrap(); + key.field6 = id; + key + } + + fn get_img(id: u32) -> Bytes { + format!("{id:064}").into() + } + + fn get_large_img() -> Bytes { + vec![0; 8192].into() + } + + #[tokio::test] + async fn write_one_image() { + let harness = TenantHarness::create("split_writer_write_one_image") + .await + .unwrap(); + let (tenant, ctx) = harness.load().await; + + let tline = tenant + .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + + let mut writer = SplitImageLayerWriter::new( + tenant.conf, + tline.timeline_id, + tenant.tenant_shard_id, + get_key(0), + Lsn(0x18), + 4 * 1024 * 1024, + &ctx, + ) + .await + .unwrap(); + + writer + .put_image(get_key(0), get_img(0), &tline, &ctx) + .await + .unwrap(); + let layers = writer.finish(&tline, &ctx, get_key(10)).await.unwrap(); + assert_eq!(layers.len(), 1); + } + + #[tokio::test] + async fn write_split() { + let harness = TenantHarness::create("split_writer_write_split") + .await + .unwrap(); + let (tenant, ctx) = harness.load().await; + + let tline = tenant + .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + + let mut writer = SplitImageLayerWriter::new( + tenant.conf, + tline.timeline_id, + tenant.tenant_shard_id, + get_key(0), + Lsn(0x18), + 4 * 1024 * 1024, + &ctx, + ) + .await + .unwrap(); + const N: usize = 2000; + for i in 0..N { + let i = i as u32; + writer + .put_image(get_key(i), get_large_img(), &tline, &ctx) + .await + .unwrap(); + } + let layers = writer + .finish(&tline, &ctx, get_key(N as u32)) + .await + .unwrap(); + assert_eq!(layers.len(), N / 512 + 1); + for idx in 0..layers.len() { + assert_ne!(layers[idx].layer_desc().key_range.start, Key::MIN); + assert_ne!(layers[idx].layer_desc().key_range.end, Key::MAX); + if idx > 0 { + assert_eq!( + layers[idx - 1].layer_desc().key_range.end, + layers[idx].layer_desc().key_range.start + ); + } + } + } + + #[tokio::test] + async fn write_large_img() { + let harness = TenantHarness::create("split_writer_write_large_img") + .await + .unwrap(); + let (tenant, ctx) = harness.load().await; + + let tline = tenant + .create_test_timeline(TIMELINE_ID, Lsn(0x10), DEFAULT_PG_VERSION, &ctx) + .await + .unwrap(); + + let mut writer = SplitImageLayerWriter::new( + tenant.conf, + tline.timeline_id, + tenant.tenant_shard_id, + get_key(0), + Lsn(0x18), + 4 * 1024, + &ctx, + ) + .await + .unwrap(); + + writer + .put_image(get_key(0), get_img(0), &tline, &ctx) + .await + .unwrap(); + writer + .put_image(get_key(1), get_large_img(), &tline, &ctx) + .await + .unwrap(); + let layers = writer.finish(&tline, &ctx, get_key(10)).await.unwrap(); + assert_eq!(layers.len(), 2); + } +}