mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 02:12:56 +00:00
Part of #8128. ## Problem Scrubber uses `scan_metadata` command to flag metadata inconsistencies. To trust it at scale, we need to make sure the errors we emit is a reflection of real scenario. One check performed in the scrubber is to see whether layers listed in the latest `index_part.json` is present in object listing. Currently, the scrubber does not robustly handle the case where objects are uploaded/deleted during the scan. ## Summary of changes **Condition for success:** An object in the index is (1) in the object listing we acquire from S3 or (2) found in a HeadObject request (new object). - Add in the `HeadObject` requests for the layers missing from the object listing. - Keep the order of first getting the object listing and then downloading the layers. - Update check to only consider shards with highest shard count. - Skip analyzing a timeline if `deleted_at` tombstone is marked in `index_part.json`. - Add new test to see if scrubber actually detect the metadata inconsistency. _Misc_ - A timeline with no ancestor should always have some layers. - Removed experimental histograms _Caveat_ - Ancestor layer is not cleaned until #8308 is implemented. If ancestor layers reference non-existing layers in the index, the scrubber will emit false positives. Signed-off-by: Yuchen Liang <yuchen@neon.tech>
54 lines
1.6 KiB
TOML
54 lines
1.6 KiB
TOML
[package]
|
|
name = "storage_scrubber"
|
|
version = "0.1.0"
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
|
|
[dependencies]
|
|
aws-sdk-s3.workspace = true
|
|
aws-smithy-async.workspace = true
|
|
either.workspace = true
|
|
tokio-rustls.workspace = true
|
|
anyhow.workspace = true
|
|
hex.workspace = true
|
|
humantime.workspace = true
|
|
thiserror.workspace = true
|
|
rand.workspace = true
|
|
bytes.workspace = true
|
|
bincode.workspace = true
|
|
crc32c.workspace = true
|
|
serde.workspace = true
|
|
serde_json.workspace = true
|
|
serde_with.workspace = true
|
|
workspace_hack.workspace = true
|
|
utils.workspace = true
|
|
async-stream.workspace = true
|
|
tokio-postgres-rustls.workspace = true
|
|
postgres_ffi.workspace = true
|
|
tokio-stream.workspace = true
|
|
tokio-postgres.workspace = true
|
|
tokio-util = { workspace = true }
|
|
futures-util.workspace = true
|
|
itertools.workspace = true
|
|
camino.workspace = true
|
|
rustls.workspace = true
|
|
rustls-native-certs.workspace = true
|
|
once_cell.workspace = true
|
|
storage_controller_client.workspace = true
|
|
|
|
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
|
chrono = { workspace = true, default-features = false, features = ["clock", "serde"] }
|
|
reqwest = { workspace = true, default-features = false, features = ["rustls-tls", "json"] }
|
|
aws-config = { workspace = true, default-features = false, features = ["rustls", "sso"] }
|
|
|
|
pageserver = { path = "../pageserver" }
|
|
pageserver_api = { path = "../libs/pageserver_api" }
|
|
remote_storage = { path = "../libs/remote_storage" }
|
|
|
|
tracing.workspace = true
|
|
tracing-subscriber.workspace = true
|
|
clap.workspace = true
|
|
tracing-appender = "0.2"
|
|
|
|
futures.workspace = true
|