mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
storcon: fix eliding parameters from proxied URL labels (#10817)
## Problem We had code for stripping IDs out of proxied paths to reduce cardinality of metrics, but it was only stripping out tenant IDs, and leaving in timeline IDs and query parameters (e.g. LSN in lsn->timestamp lookups). ## Summary of changes - Use a more general regex approach. There is still some risk that a future pageserver API might include a parameter in `/the/path/`, but we control that API and it is not often extended. We will also alert on metrics cardinality in staging so that if we made that mistake we would notice.
This commit is contained in:
@@ -34,6 +34,7 @@ reqwest = { workspace = true, features = ["stream"] }
|
||||
routerify.workspace = true
|
||||
safekeeper_api.workspace = true
|
||||
safekeeper_client.workspace = true
|
||||
regex.workspace = true
|
||||
rustls-native-certs.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
|
||||
@@ -516,6 +516,17 @@ async fn handle_tenant_timeline_block_unblock_gc(
|
||||
json_response(StatusCode::OK, ())
|
||||
}
|
||||
|
||||
// For metric labels where we would like to include the approximate path, but exclude high-cardinality fields like query parameters
|
||||
// and tenant/timeline IDs. Since we are proxying to arbitrary paths, we don't have routing templates to
|
||||
// compare to, so we can just filter out our well known ID format with regexes.
|
||||
fn path_without_ids(path: &str) -> String {
|
||||
static ID_REGEX: std::sync::OnceLock<regex::Regex> = std::sync::OnceLock::new();
|
||||
ID_REGEX
|
||||
.get_or_init(|| regex::Regex::new(r"([0-9a-fA-F]{32}(-[0-9]{4})?|\?.*)").unwrap())
|
||||
.replace_all(path, "")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
async fn handle_tenant_timeline_passthrough(
|
||||
service: Arc<Service>,
|
||||
req: Request<Body>,
|
||||
@@ -551,10 +562,7 @@ async fn handle_tenant_timeline_passthrough(
|
||||
.metrics_group
|
||||
.storage_controller_passthrough_request_latency;
|
||||
|
||||
// This is a bit awkward. We remove the param from the request
|
||||
// and join the words by '_' to get a label for the request.
|
||||
let just_path = path.replace(&tenant_shard_str, "");
|
||||
let path_label = just_path
|
||||
let path_label = path_without_ids(&path)
|
||||
.split('/')
|
||||
.filter(|token| !token.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
@@ -2089,3 +2097,16 @@ pub fn make_router(
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
|
||||
use super::path_without_ids;
|
||||
|
||||
#[test]
|
||||
fn test_path_without_ids() {
|
||||
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
|
||||
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788"), "/v1/tenant//timeline/");
|
||||
assert_eq!(path_without_ids("/v1/tenant/1a2b3344556677881122334455667788-0108/timeline/AA223344556677881122334455667788?parameter=foo"), "/v1/tenant//timeline/");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user