mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-17 10:22:56 +00:00
Compare commits
8 Commits
release-pr
...
access_sta
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
83294d771b | ||
|
|
b3ef6c7bf5 | ||
|
|
f88ff9f3c6 | ||
|
|
593c4244fd | ||
|
|
d7aa36c4c0 | ||
|
|
df127ef209 | ||
|
|
72a73d2c82 | ||
|
|
172239c7ee |
@@ -8,6 +8,7 @@ OBJS = \
|
||||
libpagestore.o \
|
||||
libpqwalproposer.o \
|
||||
neon.o \
|
||||
access_stat.o \
|
||||
pagestore_smgr.o \
|
||||
relsize_cache.o \
|
||||
walproposer.o \
|
||||
|
||||
274
pgxn/neon/access_stat.c
Normal file
274
pgxn/neon/access_stat.c
Normal file
@@ -0,0 +1,274 @@
|
||||
|
||||
/*
|
||||
* We want this statistic to rpresent current access patern mthis is why when
|
||||
* (n_seq_accesses + n_rnd_accesses) > MAX_ACCESS_COUNTER then we divide both counters by two,
|
||||
* so decreasng weight of historical data
|
||||
*/
|
||||
#include "postgres.h"
|
||||
#include "funcapi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "common/hashfn.h"
|
||||
#include "pagestore_client.h"
|
||||
#include "storage/relfilenode.h"
|
||||
#include "utils/guc.h"
|
||||
|
||||
/* Structure used to predict sequential access */
|
||||
|
||||
typedef struct AccessStatEntry {
|
||||
RelFileNode relnode;
|
||||
BlockNumber blkno; /* last accessed black number */
|
||||
uint32 n_seq_accesses; /* number of sequential accesses (when block N+1 is accessed after block N) */
|
||||
uint32 n_rnd_accesses; /* number of random accesses */
|
||||
uint32 hash;
|
||||
uint32 status;
|
||||
uint64 access_count; /* total number of relation accesses since backend start */
|
||||
dlist_node lru_node; /* LRU list node */
|
||||
} AccessStatEntry;
|
||||
|
||||
#define SH_PREFIX as
|
||||
#define SH_ELEMENT_TYPE AccessStatEntry
|
||||
#define SH_KEY_TYPE RelFileNode
|
||||
#define SH_KEY relnode
|
||||
#define SH_STORE_HASH
|
||||
#define SH_GET_HASH(tb, a) ((a)->hash)
|
||||
#define SH_HASH_KEY(tb, key) hash_bytes( \
|
||||
((const unsigned char *) &(key)), \
|
||||
sizeof(RelFileNode) \
|
||||
)
|
||||
|
||||
#define SH_EQUAL(tb, a, b) RelFileNodeEquals((a), (b))
|
||||
#define SH_SCOPE static inline
|
||||
#define SH_DEFINE
|
||||
#define SH_DECLARE
|
||||
#include "lib/simplehash.h"
|
||||
|
||||
static as_hash *hash;
|
||||
static dlist_head lru;
|
||||
static int max_access_stat_size;
|
||||
static int max_access_stat_count;
|
||||
static double min_seq_access_ratio;
|
||||
static int min_seq_access_count;
|
||||
|
||||
void access_stat_init(void)
|
||||
{
|
||||
MemoryContext memctx = AllocSetContextCreate(TopMemoryContext,
|
||||
"NeonSMGR/access_stat",
|
||||
ALLOCSET_DEFAULT_SIZES);
|
||||
DefineCustomIntVariable("neon.max_access_stat_size",
|
||||
"Maximal size of Neon relation access statistic hash",
|
||||
NULL,
|
||||
&max_access_stat_size,
|
||||
1024,
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
DefineCustomIntVariable("neon.max_access_stat_count",
|
||||
"Maximal value of relation access counter after which counters are divided by 2",
|
||||
NULL,
|
||||
&max_access_stat_count,
|
||||
1024,
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
DefineCustomRealVariable("neon.min_seq_access_ratio",
|
||||
"Minimal seq/(rnd+seq) ratio to determine sequential access",
|
||||
NULL,
|
||||
&min_seq_access_ratio,
|
||||
0.9,
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
DefineCustomIntVariable("neon.min_seq_access_count",
|
||||
"Minimal access count to determine sequetial access",
|
||||
NULL,
|
||||
&min_seq_access_count,
|
||||
10,
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
hash = as_create(memctx, max_access_stat_size, NULL);
|
||||
dlist_init(&lru);
|
||||
}
|
||||
|
||||
|
||||
bool is_sequential_access(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno)
|
||||
{
|
||||
bool is_seq_access = false;
|
||||
if (forkNum == MAIN_FORKNUM /* prefetch makes sense only for main fork */
|
||||
&& max_access_stat_size != 0)
|
||||
{
|
||||
AccessStatEntry* entry = as_lookup(hash, rnode);
|
||||
if (entry == NULL)
|
||||
{
|
||||
bool found;
|
||||
/* New item */
|
||||
while (hash->members >= max_access_stat_size)
|
||||
{
|
||||
/* Hash overflow: find candidate for replacement */
|
||||
AccessStatEntry* victim = dlist_container(AccessStatEntry, lru_node, dlist_pop_head_node(&lru));
|
||||
as_delete_item(hash, victim);
|
||||
}
|
||||
entry = as_insert(hash, rnode, &found);
|
||||
Assert(!found);
|
||||
/* Set both counter to zero because we don't know whethr first access is sequential or random */
|
||||
entry->n_seq_accesses = 0;
|
||||
entry->n_rnd_accesses = 0;
|
||||
entry->access_count = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32 access_count = entry->n_seq_accesses + entry->n_rnd_accesses;
|
||||
/*
|
||||
* We want this function to represent most recent access pattern,
|
||||
* so when number of accesses exceed threashold value `max_access_stat_count`
|
||||
* we divide bother coutners by two devaluing old data
|
||||
*/
|
||||
if (access_count >= max_access_stat_count)
|
||||
{
|
||||
entry->n_seq_accesses >>= 1;
|
||||
entry->n_rnd_accesses >>= 1;
|
||||
}
|
||||
if (entry->blkno+1 == blkno)
|
||||
entry->n_seq_accesses += 1;
|
||||
else
|
||||
entry->n_rnd_accesses += 1;
|
||||
entry->access_count += 1;
|
||||
access_count = entry->n_seq_accesses + entry->n_rnd_accesses;
|
||||
|
||||
is_seq_access = access_count >= min_seq_access_count
|
||||
&& (double)entry->n_seq_accesses / access_count >= min_seq_access_ratio;
|
||||
|
||||
|
||||
/* Remove entry from LRU list tobe able to insert it to the end of this list */
|
||||
dlist_delete(&entry->lru_node);
|
||||
}
|
||||
/* Place entry to the tail of LRU list */
|
||||
dlist_push_tail(&lru, &entry->lru_node);
|
||||
entry->blkno = blkno;
|
||||
}
|
||||
return is_seq_access;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get relation access pattern
|
||||
*/
|
||||
PG_FUNCTION_INFO_V1(get_relation_access_statistics);
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TupleDesc tupdesc;
|
||||
dlist_node* curr;
|
||||
} AccessStatContext;
|
||||
|
||||
#define NUM_ACCESS_STAT_COLUMNS 6
|
||||
|
||||
Datum
|
||||
get_relation_access_statistics(PG_FUNCTION_ARGS)
|
||||
{
|
||||
FuncCallContext *funcctx;
|
||||
Datum result;
|
||||
MemoryContext oldcontext;
|
||||
AccessStatContext *fctx; /* User function context. */
|
||||
TupleDesc tupledesc;
|
||||
TupleDesc expected_tupledesc;
|
||||
HeapTuple tuple;
|
||||
|
||||
if (SRF_IS_FIRSTCALL())
|
||||
{
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
|
||||
/* Switch context when allocating stuff to be used in later calls */
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
/* Create a user function context for cross-call persistence */
|
||||
fctx = (AccessStatContext *) palloc(sizeof(AccessStatContext));
|
||||
|
||||
/*
|
||||
* To smoothly support upgrades from version 1.0 of this extension
|
||||
* transparently handle the (non-)existence of the pinning_backends
|
||||
* column. We unfortunately have to get the result type for that... -
|
||||
* we can't use the result type determined by the function definition
|
||||
* without potentially crashing when somebody uses the old (or even
|
||||
* wrong) function definition though.
|
||||
*/
|
||||
if (get_call_result_type(fcinfo, NULL, &expected_tupledesc) != TYPEFUNC_COMPOSITE)
|
||||
elog(ERROR, "return type must be a row type");
|
||||
|
||||
if (expected_tupledesc->natts != NUM_ACCESS_STAT_COLUMNS)
|
||||
elog(ERROR, "incorrect number of output arguments");
|
||||
|
||||
/* Construct a tuple descriptor for the result rows. */
|
||||
tupledesc = CreateTemplateTupleDesc(expected_tupledesc->natts);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 1, "relfilenode",
|
||||
OIDOID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 2, "reltablespace",
|
||||
OIDOID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 3, "reldatabase",
|
||||
OIDOID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 4, "seqaccess",
|
||||
INT4OID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 5, "rndaccess",
|
||||
INT4OID, -1, 0);
|
||||
TupleDescInitEntry(tupledesc, (AttrNumber) 6, "accesscnt",
|
||||
INT8OID, -1, 0);
|
||||
|
||||
fctx->tupdesc = BlessTupleDesc(tupledesc);
|
||||
fctx->curr = dlist_is_empty(&lru) ? NULL : dlist_tail_node(&lru);
|
||||
|
||||
|
||||
/* Set max calls and remember the user function context. */
|
||||
funcctx->max_calls = hash->members;
|
||||
funcctx->user_fctx = fctx;
|
||||
|
||||
/* Return to original context when allocating transient memory */
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
|
||||
funcctx = SRF_PERCALL_SETUP();
|
||||
|
||||
/* Get the saved state */
|
||||
fctx = funcctx->user_fctx;
|
||||
if (fctx->curr)
|
||||
{
|
||||
AccessStatEntry* entry = dlist_container(AccessStatEntry, lru_node, fctx->curr);
|
||||
Datum values[NUM_ACCESS_STAT_COLUMNS];
|
||||
bool nulls[NUM_ACCESS_STAT_COLUMNS] = {
|
||||
false, false, false, false, false, false
|
||||
};
|
||||
|
||||
values[0] = ObjectIdGetDatum(entry->relnode.relNode);
|
||||
values[1] = ObjectIdGetDatum(entry->relnode.spcNode);
|
||||
values[2] = ObjectIdGetDatum(entry->relnode.dbNode);
|
||||
values[3] = Int32GetDatum(entry->n_seq_accesses);
|
||||
values[4] = Int32GetDatum(entry->n_rnd_accesses);
|
||||
values[5] = Int64GetDatum(entry->access_count);
|
||||
|
||||
/* Build and return the tuple. */
|
||||
tuple = heap_form_tuple(fctx->tupdesc, values, nulls);
|
||||
result = HeapTupleGetDatum(tuple);
|
||||
|
||||
fctx->curr = dlist_has_prev(&lru, fctx->curr) ? dlist_prev_node(&lru, fctx->curr) : NULL;
|
||||
|
||||
SRF_RETURN_NEXT(funcctx, result);
|
||||
}
|
||||
else
|
||||
SRF_RETURN_DONE(funcctx);
|
||||
}
|
||||
|
||||
@@ -58,6 +58,7 @@ char *neon_auth_token;
|
||||
int n_unflushed_requests = 0;
|
||||
int flush_every_n_requests = 8;
|
||||
int readahead_buffer_size = 128;
|
||||
int readahead_distance = 10;
|
||||
|
||||
bool (*old_redo_read_buffer_filter) (XLogReaderState *record, uint8 block_id) = NULL;
|
||||
|
||||
@@ -452,6 +453,18 @@ pg_init_libpagestore(void)
|
||||
PGC_USERSET,
|
||||
0, /* no flags required */
|
||||
NULL, (GucIntAssignHook) &readahead_buffer_resize, NULL);
|
||||
DefineCustomIntVariable("neon.readahead_distance",
|
||||
"Number of read-ahead blocks",
|
||||
NULL,
|
||||
&readahead_distance,
|
||||
10,
|
||||
0,
|
||||
INT_MAX,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
relsize_hash_init();
|
||||
|
||||
@@ -476,4 +489,5 @@ pg_init_libpagestore(void)
|
||||
redo_read_buffer_filter = neon_redo_read_buffer_filter;
|
||||
}
|
||||
lfc_init();
|
||||
access_stat_init();
|
||||
}
|
||||
|
||||
@@ -27,8 +27,18 @@ RETURNS SETOF RECORD
|
||||
AS 'MODULE_PATHNAME', 'local_cache_pages'
|
||||
LANGUAGE C PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION get_relation_access_statistics()
|
||||
RETURNS SETOF RECORD
|
||||
AS 'MODULE_PATHNAME', 'get_relation_access_statistics'
|
||||
LANGUAGE C PARALLEL SAFE;
|
||||
|
||||
-- Create a view for convenient access.
|
||||
CREATE VIEW local_cache AS
|
||||
SELECT P.* FROM local_cache_pages() AS P
|
||||
SELECT relname,P.* FROM local_cache_pages() AS P
|
||||
(pageoffs int8, relfilenode oid, reltablespace oid, reldatabase oid,
|
||||
relforknumber int2, relblocknumber int8, accesscount int4);
|
||||
relforknumber int2, relblocknumber int8, accesscount int4) JOIN pg_class pc ON (P.relfilenode = pc.relfilenode);
|
||||
|
||||
CREATE VIEW relation_access_statistics AS
|
||||
SELECT relname,P.* FROM get_relation_access_statistics() AS P
|
||||
(relfilenode oid, reltablespace oid, reldatabase oid,
|
||||
seqaccess int4, rndaccess int4, access_count int8) JOIN pg_class pc ON (P.relfilenode = pc.relfilenode);
|
||||
|
||||
@@ -157,6 +157,7 @@ extern page_server_api * page_server;
|
||||
extern char *page_server_connstring;
|
||||
extern int flush_every_n_requests;
|
||||
extern int readahead_buffer_size;
|
||||
extern int readahead_distance;
|
||||
extern bool seqscan_prefetch_enabled;
|
||||
extern int seqscan_prefetch_distance;
|
||||
extern char *neon_timeline;
|
||||
@@ -210,5 +211,8 @@ extern bool lfc_cache_contains(RelFileNode rnode, ForkNumber forkNum, BlockNumbe
|
||||
extern void lfc_evict(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
|
||||
extern void lfc_init(void);
|
||||
|
||||
/* Access statistic */
|
||||
extern void access_stat_init(void);
|
||||
extern bool is_sequential_access(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1881,6 +1881,7 @@ neon_read_at_lsn(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno,
|
||||
if (RecoveryInProgress() && !(MyBackendType == B_STARTUP))
|
||||
XLogWaitForReplayOf(request_lsn);
|
||||
|
||||
|
||||
/*
|
||||
* Try to find prefetched page in the list of received pages.
|
||||
*/
|
||||
@@ -2003,6 +2004,10 @@ neon_read(SMgrRelation reln, ForkNumber forkNum, BlockNumber blkno,
|
||||
elog(ERROR, "unknown relpersistence '%c'", reln->smgr_relpersistence);
|
||||
}
|
||||
|
||||
/* If it is expected to be sequential access then initiate prefetch of next block */
|
||||
if (is_sequential_access(reln->smgr_rnode.node, forkNum, blkno))
|
||||
neon_prefetch(reln, forkNum, blkno + readahead_distance);
|
||||
|
||||
/* Try to read from local file cache */
|
||||
if (lfc_read(reln->smgr_rnode.node, forkNum, blkno, buffer))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user