storcon: fix eliding parameters from proxied URL labels (#10817)

## Problem

We had code for stripping IDs out of proxied paths to reduce cardinality
of metrics, but it was only stripping out tenant IDs, and leaving in
timeline IDs and query parameters (e.g. LSN in lsn->timestamp lookups).

## Summary of changes

- Use a more general regex approach.

There is still some risk that a future pageserver API might include a
parameter in `/the/path/`, but we control that API and it is not often
extended. We will also alert on metrics cardinality in staging so that
if we made that mistake we would notice.
This commit is contained in:
John Spray
2025-02-14 09:57:19 +00:00
committed by GitHub
parent 8bdb1828c8
commit 996f0a3753
3 changed files with 27 additions and 4 deletions

View File

@@ -34,6 +34,7 @@ reqwest = { workspace = true, features = ["stream"] }
routerify.workspace = true
safekeeper_api.workspace = true
safekeeper_client.workspace = true
regex.workspace = true
rustls-native-certs.workspace = true
serde.workspace = true
serde_json.workspace = true

View File

@@ -516,6 +516,17 @@ async fn handle_tenant_timeline_block_unblock_gc(
json_response(StatusCode::OK, ())
}
// For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
// and tenant/timeline IDs. Since we are proxying to arbitrary paths, we don't have routing templates to
// compare to, so we can just filter out our well known ID format with regexes.
fn path_without_ids(path: &str) -> String {
static ID_REGEX: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
ID_REGEX
.get_or_init(|| regex::Regex::new(r"([0-9a-fA-F]{32}(-[0-9]{4})?|\?.*)").unwrap())
.replace_all(path, "")
.to_string()
}
async fn handle_tenant_timeline_passthrough(
service: Arc<Service>,
req: Request<Body>,
@@ -551,10 +562,7 @@ async fn handle_tenant_timeline_passthrough(
.metrics_group
.storage_controller_passthrough_request_latency;
// This is a bit awkward. We remove the param from the request
// and join the words by '_' to get a label for the request.
let just_path = path.replace(&tenant_shard_str, "");
let path_label = just_path
let path_label = path_without_ids(&path)
.split('/')
.filter(|token| !token.is_empty())
.collect::<Vec<_>>()
@@ -2089,3 +2097,16 @@ pub fn make_router(
)
})
}
#[cfg(test)]
mod test {
use super::path_without_ids;
#[test]
fn test_path_without_ids() {
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"), "/v1/tenant//timeline/");
}
}