diff --git a/pgxn/neon/file_cache.c b/pgxn/neon/file_cache.c index 6d1e15123a..a6aa381975 100644 --- a/pgxn/neon/file_cache.c +++ b/pgxn/neon/file_cache.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include @@ -59,12 +60,17 @@ * 2. Improve access locality, subsequent pages will be allocated together improving seqscan speed */ #define BLOCKS_PER_CHUNK 128 /* 1Mb chunk */ +#define CHUNK_SIZE (BLOCKS_PER_CHUNK * BLCKSZ) #define MB ((uint64)1024*1024) +#ifndef MADV_REMOVE +#define MADV_REMOVE MADV_FREE /* MacOS doesn't have MADV_REMOVE and at Linux MADV_FREE works only for MAP_PRIVATE */ +#endif + #define SIZE_MB_TO_CHUNKS(size) ((uint32)((size) * MB / BLCKSZ / BLOCKS_PER_CHUNK)) #define MAX_MONITOR_INTERVAL_USEC 1000000 /* 1 second */ -#define MAX_DISK_WRITE_RATE 1000 /* MB/sec */ +#define MAX_MEM_WRITE_RATE 10000 /* MB/sec */ typedef struct FileCacheEntry { @@ -83,12 +89,12 @@ typedef struct FileCacheControl } FileCacheControl; static HTAB* lfc_hash; -static int lfc_desc; static LWLockId lfc_lock; static int lfc_max_size; static int lfc_size_limit; static int lfc_free_space_watermark; static int lfc_free_memory_watermark; +static char* lfc_base_addr; static char* lfc_path; static FileCacheControl* lfc_ctl; static shmem_startup_hook_type prev_shmem_startup_hook; @@ -145,7 +151,7 @@ lfc_shmem_startup(void) LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); - lfc_ctl = (FileCacheControl*)ShmemInitStruct("lfc", sizeof(FileCacheControl), &found); + lfc_ctl = (FileCacheControl*)ShmemInitStruct("lfc", sizeof(FileCacheControl) + lfc_max_size*MB + CHUNK_SIZE, &found); if (!found) { uint32 lfc_size = SIZE_MB_TO_CHUNKS(lfc_max_size); @@ -160,10 +166,10 @@ lfc_shmem_startup(void) lfc_ctl->size = 0; lfc_ctl->used = 0; dlist_init(&lfc_ctl->lru); - - /* Remove file cache on restart */ - (void)unlink(lfc_path); } + lfc_base_addr = (char*)TYPEALIGN(CHUNK_SIZE, lfc_ctl+1); + if (!found) + madvise(lfc_base_addr, lfc_max_size*MB, MADV_REMOVE); LWLockRelease(AddinShmemInitLock); } @@ -175,7 +181,7 @@ lfc_shmem_request(void) prev_shmem_request_hook(); #endif - RequestAddinShmemSpace(sizeof(FileCacheControl) + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size)+1, sizeof(FileCacheEntry))); + RequestAddinShmemSpace(sizeof(FileCacheControl) + lfc_max_size*MB + CHUNK_SIZE + hash_estimate_size(SIZE_MB_TO_CHUNKS(lfc_max_size)+1, sizeof(FileCacheEntry))); RequestNamedLWLockTranche("lfc_lock", 1); } @@ -201,26 +207,14 @@ lfc_change_limit_hook(int newval, void *extra) if (!lfc_ctl || !UsedShmemSegAddr || IsParallelWorker()) return; - /* Open cache file if not done yet */ - if (lfc_desc == 0) - { - lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT); - if (lfc_desc < 0) { - elog(WARNING, "Failed to open file cache %s: %m, disabling file cache", lfc_path); - lfc_size_limit = 0; /* disable file cache */ - return; - } - } LWLockAcquire(lfc_lock, LW_EXCLUSIVE); while (new_size < lfc_ctl->used && !dlist_is_empty(&lfc_ctl->lru)) { /* Shrink cache by throwing away least recently accessed chunks and returning their space to file system */ FileCacheEntry* victim = dlist_container(FileCacheEntry, lru_node, dlist_pop_head_node(&lfc_ctl->lru)); Assert(victim->access_count == 0); -#ifdef FALLOC_FL_PUNCH_HOLE - if (fallocate(lfc_desc, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, (off_t)victim->offset*BLOCKS_PER_CHUNK*BLCKSZ, BLOCKS_PER_CHUNK*BLCKSZ) < 0) - elog(LOG, "Failed to punch hole in file: %m"); -#endif + if (madvise(lfc_base_addr + victim->offset*CHUNK_SIZE, CHUNK_SIZE, MADV_REMOVE) < 0) + elog(LOG, "Failed to punch hole in memory: %m"); hash_search(lfc_hash, &victim->key, HASH_REMOVE, NULL); lfc_ctl->used -= 1; } @@ -248,9 +242,9 @@ FileCacheMonitorMain(Datum main_arg) { /* * Choose file system state monitor interval so that space can not be exosted - * during this period but not longer than MAX_MONITOR_INTERVAL (10 sec) + * during this period but not longer than MAX_MONITOR_INTERVAL (1 sec) */ - uint64 monitor_interval = Min(MAX_MONITOR_INTERVAL_USEC, lfc_free_space_watermark*MB/MAX_DISK_WRITE_RATE); + uint64 monitor_interval = Min(MAX_MONITOR_INTERVAL_USEC, lfc_free_space_watermark*MB/MAX_MEM_WRITE_RATE); /* Establish signal handlers. */ pqsignal(SIGUSR1, procsignal_sigusr1_handler); @@ -261,21 +255,9 @@ FileCacheMonitorMain(Datum main_arg) /* Periodically dump buffers until terminated. */ while (!ShutdownRequestPending) { - if (lfc_size_limit != 0) + if (lfc_size_limit != 0 && lfc_free_memory_watermark != 0 ) { - bool shrink_cache = false; - if (lfc_free_space_watermark != 0) - { - struct statvfs sfs; - if (statvfs(lfc_path, &sfs) < 0) - elog(WARNING, "Failed to obtain status of %s: %m", lfc_path); - else - shrink_cache |= sfs.f_bavail*sfs.f_bsize < lfc_free_space_watermark*MB; - } - if (lfc_free_memory_watermark != 0) - shrink_cache |= get_available_memory() < lfc_free_memory_watermark*MB; - - if (shrink_cache) + if (get_available_memory() < lfc_free_memory_watermark*MB) { if (lfc_shrinking_factor < 31) { lfc_shrinking_factor += 1; @@ -317,6 +299,18 @@ lfc_init(void) if (!process_shared_preload_libraries_in_progress) elog(ERROR, "Neon module should be loaded via shared_preload_libraries"); + /* TODO: left only for compatibility with on-disk cache */ + DefineCustomStringVariable("neon.file_cache_path", + "Path to local file cache (can be raw device)", + NULL, + &lfc_path, + "file.cache", + PGC_POSTMASTER, + 0, + NULL, + NULL, + NULL); + DefineCustomIntVariable("neon.max_file_cache_size", "Maximal size of Neon local file cache", NULL, @@ -343,19 +337,6 @@ lfc_init(void) lfc_change_limit_hook, NULL); - DefineCustomIntVariable("neon.free_space_watermark", - "Minimal free space in local file system after reaching which local file cache will be truncated", - NULL, - &lfc_free_space_watermark, - 1024, /* 1GB */ - 0, - INT_MAX, - PGC_SIGHUP, - GUC_UNIT_MB, - NULL, - NULL, - NULL); - DefineCustomIntVariable("neon.free_memory_watermark", "Minimal free memory in system after reaching which local file cache will be truncated", NULL, @@ -369,17 +350,6 @@ lfc_init(void) NULL, NULL); - DefineCustomStringVariable("neon.file_cache_path", - "Path to local file cache (can be raw device)", - NULL, - &lfc_path, - "file.cache", - PGC_POSTMASTER, - 0, - NULL, - NULL, - NULL); - if (lfc_max_size == 0) return; @@ -528,27 +498,7 @@ lfc_read(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, dlist_delete(&entry->lru_node); LWLockRelease(lfc_lock); - /* Open cache file if not done yet */ - if (lfc_desc == 0) - { - lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT); - if (lfc_desc < 0) { - elog(LOG, "Failed to open file cache %s: %m", lfc_path); - lfc_size_limit = 0; /* disable file cache */ - result = false; - } - } - - if (lfc_desc > 0) - { - rc = pread(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ); - if (rc != BLCKSZ) - { - elog(INFO, "Failed to read file cache: %m"); - lfc_size_limit = 0; /* disable file cache */ - result = false; - } - } + memcpy(buffer, lfc_base_addr + ((size_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ, BLCKSZ); /* Place entry to the head of LRU list */ LWLockAcquire(lfc_lock, LW_EXCLUSIVE); @@ -621,24 +571,8 @@ lfc_write(RelFileNode rnode, ForkNumber forkNum, BlockNumber blkno, } LWLockRelease(lfc_lock); - /* Open cache file if not done yet */ - if (lfc_desc == 0) - { - lfc_desc = BasicOpenFile(lfc_path, O_RDWR|O_CREAT); - if (lfc_desc < 0) { - elog(WARNING, "Failed to open file cache %s: %m, disabling file cache", lfc_path); - lfc_size_limit = 0; /* disable file cache */ - } - } - if (lfc_desc > 0) - { - rc = pwrite(lfc_desc, buffer, BLCKSZ, ((off_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ); - if (rc != BLCKSZ) - { - elog(WARNING, "Failed to write file cache: %m, disabling file cache"); - lfc_size_limit = 0; /* disable file cache */ - } - } + memcpy(lfc_base_addr + ((size_t)entry->offset*BLOCKS_PER_CHUNK + chunk_offs)*BLCKSZ, buffer, BLCKSZ); + /* Place entry to the head of LRU list */ LWLockAcquire(lfc_lock, LW_EXCLUSIVE); Assert(entry->access_count > 0);