mirror of
https://github.com/neondatabase/neon.git
synced 2026-03-28 04:30:39 +00:00
Compare commits
3 Commits
conrad/pro
...
exponentia
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a4c1b06eb | ||
|
|
a59720b5f5 | ||
|
|
65c519f36c |
@@ -23,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
|
|||||||
SHLIB_LINK = -lcurl
|
SHLIB_LINK = -lcurl
|
||||||
|
|
||||||
EXTENSION = neon
|
EXTENSION = neon
|
||||||
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql
|
DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
|
||||||
PGFILEDESC = "neon - cloud storage for PostgreSQL"
|
PGFILEDESC = "neon - cloud storage for PostgreSQL"
|
||||||
|
|
||||||
EXTRA_CLEAN = \
|
EXTRA_CLEAN = \
|
||||||
|
|||||||
@@ -1263,7 +1263,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
|
|||||||
int32 dc;
|
int32 dc;
|
||||||
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
||||||
LWLockAcquire(lfc_lock, LW_SHARED);
|
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
|
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, 1.0);
|
||||||
LWLockRelease(lfc_lock);
|
LWLockRelease(lfc_lock);
|
||||||
PG_RETURN_INT32(dc);
|
PG_RETURN_INT32(dc);
|
||||||
}
|
}
|
||||||
@@ -1280,7 +1280,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
|
|||||||
int32 dc;
|
int32 dc;
|
||||||
bool reset = PG_GETARG_BOOL(0);
|
bool reset = PG_GETARG_BOOL(0);
|
||||||
LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
|
LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
|
||||||
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
|
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1, 1.0);
|
||||||
if (reset)
|
if (reset)
|
||||||
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
|
memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
|
||||||
LWLockRelease(lfc_lock);
|
LWLockRelease(lfc_lock);
|
||||||
@@ -1288,3 +1288,21 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
|
|||||||
}
|
}
|
||||||
PG_RETURN_NULL();
|
PG_RETURN_NULL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PG_FUNCTION_INFO_V1(approximate_optimal_cache_size);
|
||||||
|
|
||||||
|
Datum
|
||||||
|
approximate_optimal_cache_size(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
if (lfc_size_limit != 0)
|
||||||
|
{
|
||||||
|
int32 dc;
|
||||||
|
time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
|
||||||
|
double min_hit_ratio = PG_ARGISNULL(1) ? 1.0 : PG_GETARG_FLOAT8(1);
|
||||||
|
LWLockAcquire(lfc_lock, LW_SHARED);
|
||||||
|
dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, min_hit_ratio);
|
||||||
|
LWLockRelease(lfc_lock);
|
||||||
|
PG_RETURN_INT32(dc);
|
||||||
|
}
|
||||||
|
PG_RETURN_NULL();
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
* Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
|
* Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
|
||||||
*
|
*
|
||||||
* Implements https://hal.science/hal-00465313/document
|
* Implements https://hal.science/hal-00465313/document
|
||||||
*
|
*
|
||||||
* Based on Hideaki Ohno's C++ implementation. This is probably not ideally
|
* Based on Hideaki Ohno's C++ implementation. This is probably not ideally
|
||||||
* suited to estimating the cardinality of very large sets; in particular, we
|
* suited to estimating the cardinality of very large sets; in particular, we
|
||||||
* have not attempted to further optimize the implementation as described in
|
* have not attempted to further optimize the implementation as described in
|
||||||
@@ -126,22 +126,69 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
|
|||||||
/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
|
/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
|
||||||
count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
|
count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);
|
||||||
|
|
||||||
cState->regs[index][count] = now;
|
if (cState->regs[index][count].ts)
|
||||||
|
{
|
||||||
|
/* update histgoram */
|
||||||
|
int64_t delta = (now - cState->regs[index][count].ts)/USECS_PER_SEC;
|
||||||
|
uint32_t new_histogram[HIST_SIZE] = {0};
|
||||||
|
for (int i = 0; i < HIST_SIZE; i++) {
|
||||||
|
/* Use middle point of interval */
|
||||||
|
uint32 interval_log2 = pg_ceil_log2_32((delta + (HIST_MIN_INTERVAL*((1<<i) + ((1<<i)/2))/2)) / HIST_MIN_INTERVAL);
|
||||||
|
uint32 cell = Min(interval_log2, HIST_SIZE-1);
|
||||||
|
new_histogram[cell] += cState->regs[index][count].histogram[i];
|
||||||
|
}
|
||||||
|
memcpy(cState->regs[index][count].histogram, new_histogram, sizeof new_histogram);
|
||||||
|
}
|
||||||
|
cState->regs[index][count].ts = now;
|
||||||
|
cState->regs[index][count].histogram[0] += 1; // most recent access always goes to first histogram backet
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
getAccessCount(const HyperLogLogRegister* reg, time_t duration)
|
||||||
|
{
|
||||||
|
uint32_t count = 0;
|
||||||
|
/* Simplest solution is to take in account all points fro overlapped interval */
|
||||||
|
for (size_t i = 0; i < HIST_SIZE && HIST_MIN_INTERVAL*((1 << i)/2) <= duration; i++) {
|
||||||
|
count += reg->histogram[i];
|
||||||
|
}
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint8
|
static uint8
|
||||||
getMaximum(const TimestampTz* reg, TimestampTz since)
|
getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, double min_hit_ratio)
|
||||||
{
|
{
|
||||||
uint8 max = 0;
|
uint8 max = 0;
|
||||||
|
size_t i, j;
|
||||||
for (size_t i = 0; i < HLL_C_BITS + 1; i++)
|
if (min_hit_ratio == 1.0)
|
||||||
{
|
{
|
||||||
if (reg[i] >= since)
|
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||||
{
|
{
|
||||||
max = i;
|
if (reg[i].ts >= since)
|
||||||
|
{
|
||||||
|
max = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
uint32_t total_count = 0;
|
||||||
|
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||||
|
{
|
||||||
|
total_count += getAccessCount(®[i], duration);
|
||||||
|
}
|
||||||
|
if (total_count != 0)
|
||||||
|
{
|
||||||
|
const double threshold = total_count * (1 - min_hit_ratio);
|
||||||
|
for (i = 0; i < HLL_C_BITS + 1; i++)
|
||||||
|
{
|
||||||
|
// Take in account only bits with access frequncy exceeding maximal miss rate (1 - hit rate)
|
||||||
|
if (reg[i].ts >= since && getAccessCount(®[i], duration) >= threshold)
|
||||||
|
{
|
||||||
|
max = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -150,7 +197,7 @@ getMaximum(const TimestampTz* reg, TimestampTz since)
|
|||||||
* Estimates cardinality, based on elements added so far
|
* Estimates cardinality, based on elements added so far
|
||||||
*/
|
*/
|
||||||
double
|
double
|
||||||
estimateSHLL(HyperLogLogState *cState, time_t duration)
|
estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
|
||||||
{
|
{
|
||||||
double result;
|
double result;
|
||||||
double sum = 0.0;
|
double sum = 0.0;
|
||||||
@@ -161,7 +208,7 @@ estimateSHLL(HyperLogLogState *cState, time_t duration)
|
|||||||
|
|
||||||
for (i = 0; i < HLL_N_REGISTERS; i++)
|
for (i = 0; i < HLL_N_REGISTERS; i++)
|
||||||
{
|
{
|
||||||
R[i] = getMaximum(cState->regs[i], since);
|
R[i] = getMaximum(cState->regs[i], since, duration, min_hit_ratio);
|
||||||
sum += 1.0 / pow(2.0, R[i]);
|
sum += 1.0 / pow(2.0, R[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -53,6 +53,14 @@
|
|||||||
#define HLL_C_BITS (32 - HLL_BIT_WIDTH)
|
#define HLL_C_BITS (32 - HLL_BIT_WIDTH)
|
||||||
#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)
|
#define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of histogram cells. We use exponential histogram with first interval
|
||||||
|
* equals to one minutes. Autoscaler request LFC statistic with intervals 1,2,...,60 minutes
|
||||||
|
* so 2^8=64 seems to be enough for our needs.
|
||||||
|
*/
|
||||||
|
#define HIST_SIZE 8
|
||||||
|
#define HIST_MIN_INTERVAL 60 /* seconds */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* HyperLogLog is an approximate technique for computing the number of distinct
|
* HyperLogLog is an approximate technique for computing the number of distinct
|
||||||
* entries in a set. Importantly, it does this by using a fixed amount of
|
* entries in a set. Importantly, it does this by using a fixed amount of
|
||||||
@@ -69,18 +77,21 @@
|
|||||||
* modified timestamp >= the query timestamp. This value is the number of bits
|
* modified timestamp >= the query timestamp. This value is the number of bits
|
||||||
* for this register in the normal HLL calculation.
|
* for this register in the normal HLL calculation.
|
||||||
*
|
*
|
||||||
* The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
|
* The memory usage is 2^B * (C + 1) * sizeof(HyperLogLogRegister), or 920kiB.
|
||||||
* Usage could be halved if we decide to reduce the required time dimension
|
|
||||||
* precision; as 32 bits in second precision should be enough for statistics.
|
|
||||||
* However, that is not yet implemented.
|
|
||||||
*/
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
TimestampTz ts; /* last access timestamp */
|
||||||
|
uint32_t histogram[HIST_SIZE]; /* access counter exponential histogram */
|
||||||
|
} HyperLogLogRegister;
|
||||||
|
|
||||||
typedef struct HyperLogLogState
|
typedef struct HyperLogLogState
|
||||||
{
|
{
|
||||||
TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
|
HyperLogLogRegister regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
|
||||||
} HyperLogLogState;
|
} HyperLogLogState;
|
||||||
|
|
||||||
extern void initSHLL(HyperLogLogState *cState);
|
extern void initSHLL(HyperLogLogState *cState);
|
||||||
extern void addSHLL(HyperLogLogState *cState, uint32 hash);
|
extern void addSHLL(HyperLogLogState *cState, uint32 hash);
|
||||||
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);
|
extern double estimateSHLL(HyperLogLogState *cState, time_t dutration, double min_hit_ratio);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
10
pgxn/neon/neon--1.4--1.5.sql
Normal file
10
pgxn/neon/neon--1.4--1.5.sql
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
|
||||||
|
|
||||||
|
-- returns minimal LFC cache size (in 8kb pages) provided specified hit rate
|
||||||
|
CREATE FUNCTION approximate_optimal_cache_size(duration_sec integer default null, min_hit_ration float8 default null)
|
||||||
|
RETURNS integer
|
||||||
|
AS 'MODULE_PATHNAME', 'approximate_optimal_cache_size'
|
||||||
|
LANGUAGE C PARALLEL SAFE;
|
||||||
|
|
||||||
|
GRANT EXECUTE ON FUNCTION approximate_optimal_cache_size(integer,float8) TO pg_monitor;
|
||||||
|
|
||||||
1
pgxn/neon/neon--1.5--1.4.sql
Normal file
1
pgxn/neon/neon--1.5--1.4.sql
Normal file
@@ -0,0 +1 @@
|
|||||||
|
DROP FUNCTION IF EXISTS approximate_optimal_cache_size(integer,float8) CASCADE;
|
||||||
@@ -114,3 +114,46 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):
|
|||||||
|
|
||||||
assert estimation_1k >= 20 and estimation_1k <= 40
|
assert estimation_1k >= 20 and estimation_1k <= 40
|
||||||
assert estimation_10k >= 200 and estimation_10k <= 400
|
assert estimation_10k >= 200 and estimation_10k <= 400
|
||||||
|
|
||||||
|
|
||||||
|
def test_optimal_cache_size_approximation(neon_simple_env: NeonEnv):
|
||||||
|
env = neon_simple_env
|
||||||
|
|
||||||
|
endpoint = env.endpoints.create_start(
|
||||||
|
branch_name="main",
|
||||||
|
config_lines=[
|
||||||
|
"autovacuum = off",
|
||||||
|
"shared_buffers=1MB",
|
||||||
|
"neon.max_file_cache_size=256MB",
|
||||||
|
"neon.file_cache_size_limit=245MB",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
conn = endpoint.connect()
|
||||||
|
cur = conn.cursor()
|
||||||
|
cur.execute("create extension neon version '1.5'")
|
||||||
|
cur.execute(
|
||||||
|
"create table t_huge(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
|
||||||
|
)
|
||||||
|
cur.execute(
|
||||||
|
"create table t_small(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
|
||||||
|
)
|
||||||
|
cur.execute(
|
||||||
|
"insert into t_huge(pk) values (generate_series(1,1000000))"
|
||||||
|
) # table size is 21277 pages
|
||||||
|
cur.execute(
|
||||||
|
"insert into t_small(pk) values (generate_series(1,100000))"
|
||||||
|
) # table size is 2128 pages
|
||||||
|
time.sleep(2)
|
||||||
|
before = time.monotonic()
|
||||||
|
for _ in range(100):
|
||||||
|
cur.execute("select sum(count) from t_small")
|
||||||
|
cur.execute("select sum(count) from t_huge")
|
||||||
|
after = time.monotonic()
|
||||||
|
cur.execute(f"select approximate_working_set_size_seconds({int(after - before + 1)})")
|
||||||
|
ws_estimation = cur.fetchall()[0][0]
|
||||||
|
log.info(f"Working set size estimaton {ws_estimation}")
|
||||||
|
cur.execute(f"select approximate_optimal_cache_size({int(after - before + 1)}, 0.99)")
|
||||||
|
optimal_cache_size = cur.fetchall()[0][0]
|
||||||
|
log.info(f"Optimal cache size for 99% hit rate {optimal_cache_size}")
|
||||||
|
assert ws_estimation >= 20000 and ws_estimation <= 30000
|
||||||
|
assert optimal_cache_size >= 2000 and optimal_cache_size <= 3000
|
||||||
|
|||||||
Reference in New Issue
Block a user