mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-08 05:52:55 +00:00
## Problem Measuring cardinality using logs is expensive and slow. ## Summary of changes Implement a pre-aggregated HyperLogLog-based cardinality estimate. HyperLogLog estimates the cardinality of a set by using the probability that the uniform hash of a value will have a run of n 0s at the end is `1/2^n`, therefore, having observed a run of `n` 0s suggests we have measured `2^n` distinct values. By using multiple shards, we can use the harmonic mean to get a more accurate estimate. We record this into a Prometheus time-series. HyperLogLog counts can be merged by taking the `max` of each shard. We can apply a `max_over_time` in order to find the estimate of cardinality of distinct values over time
19 lines
326 B
TOML
19 lines
326 B
TOML
[package]
|
|
name = "metrics"
|
|
version = "0.1.0"
|
|
edition.workspace = true
|
|
license.workspace = true
|
|
|
|
[dependencies]
|
|
prometheus.workspace = true
|
|
libc.workspace = true
|
|
once_cell.workspace = true
|
|
chrono.workspace = true
|
|
twox-hash.workspace = true
|
|
|
|
workspace_hack.workspace = true
|
|
|
|
[dev-dependencies]
|
|
rand = "0.8"
|
|
rand_distr = "0.4.3"
|