gci autocommit

Readme: add more rustup steps
Safekeeper: fix OpenAPI spec (#9066 )
2026-02-03 02:30:37 +00:00 · 2024-09-20 14:46:19 +01:00 · 2024-09-20 14:44:15 +01:00 · 2024-09-20 12:00:05 +01:00 · 2024-09-20 12:38:42 +03:00
14 changed files with 47 additions and 166 deletions
--- a/README.md
+++ b/README.md
@@ -52,6 +52,11 @@ Building Neon requires 3.15+ version of `protoc` (protobuf-compiler). If your di
 ```
 # recommended approach from https://www.rust-lang.org/tools/install
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+# bash-only: add $HOME/.cargo/bin to PATH. The script does this for zsh
+# echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.bashrc
+
+rustup show
 ```

 #### Installing dependencies on macOS (12.3.1)
@@ -74,6 +79,11 @@ brew link --force m4
 ```
 # recommended approach from https://www.rust-lang.org/tools/install
 curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+# bash-only: add $HOME/.cargo/bin to PATH. The script does this for zsh
+echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.bashrc
+
+rustup show
 ```

 3. Install PostgreSQL Client
--- a/pgxn/neon/Makefile
+++ b/pgxn/neon/Makefile
@@ -23,7 +23,7 @@ SHLIB_LINK_INTERNAL = $(libpq)
 SHLIB_LINK = -lcurl

 EXTENSION = neon
-DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql neon--1.4--1.5.sql neon--1.5--1.4.sql
+DATA = neon--1.0.sql neon--1.0--1.1.sql neon--1.1--1.2.sql neon--1.2--1.3.sql neon--1.3--1.2.sql neon--1.2--1.1.sql neon--1.1--1.0.sql  neon--1.3--1.4.sql neon--1.4--1.3.sql
 PGFILEDESC = "neon - cloud storage for PostgreSQL"

 EXTRA_CLEAN = \
--- a/pgxn/neon/file_cache.c
+++ b/pgxn/neon/file_cache.c
@@ -1263,7 +1263,7 @@ approximate_working_set_size_seconds(PG_FUNCTION_ARGS)
 		int32 dc;
 		time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
 		LWLockAcquire(lfc_lock, LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, 1.0);
+		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration);
 		LWLockRelease(lfc_lock);
 		PG_RETURN_INT32(dc);
 	}
@@ -1280,7 +1280,7 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 		int32 dc;
 		bool reset = PG_GETARG_BOOL(0);
 		LWLockAcquire(lfc_lock, reset ? LW_EXCLUSIVE : LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1, 1.0);
+		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, (time_t)-1);
 		if (reset)
 			memset(lfc_ctl->wss_estimation.regs, 0, sizeof lfc_ctl->wss_estimation.regs);
 		LWLockRelease(lfc_lock);
@@ -1288,21 +1288,3 @@ approximate_working_set_size(PG_FUNCTION_ARGS)
 	}
 	PG_RETURN_NULL();
 }
-
-PG_FUNCTION_INFO_V1(approximate_optimal_cache_size);
-
-Datum
-approximate_optimal_cache_size(PG_FUNCTION_ARGS)
-{
-	if (lfc_size_limit != 0)
-	{
-		int32 dc;
-		time_t duration = PG_ARGISNULL(0) ? (time_t)-1 : PG_GETARG_INT32(0);
-		double min_hit_ratio = PG_ARGISNULL(1) ? 1.0 : PG_GETARG_FLOAT8(1);
-		LWLockAcquire(lfc_lock, LW_SHARED);
-		dc = (int32) estimateSHLL(&lfc_ctl->wss_estimation, duration, min_hit_ratio);
-		LWLockRelease(lfc_lock);
-		PG_RETURN_INT32(dc);
-	}
-	PG_RETURN_NULL();
-}
--- a/pgxn/neon/hll.c
+++ b/pgxn/neon/hll.c
@@ -6,7 +6,7 @@
 * Portions Copyright (c) 2014-2023, PostgreSQL Global Development Group
 *
 * Implements https://hal.science/hal-00465313/document
- *
+ * 
 * Based on Hideaki Ohno's C++ implementation.  This is probably not ideally
 * suited to estimating the cardinality of very large sets;  in particular, we
 * have not attempted to further optimize the implementation as described in
@@ -126,78 +126,22 @@ addSHLL(HyperLogLogState *cState, uint32 hash)
 	/* Compute the rank of the remaining 32 - "k" (registerWidth) bits */
 	count = rho(hash << HLL_BIT_WIDTH, HLL_C_BITS);

-	if (cState->regs[index][count].ts)
-	{
-		/* update histgoram */
-		int64_t delta = (now - cState->regs[index][count].ts)/USECS_PER_SEC;
-		uint32_t new_histogram[HIST_SIZE] = {0};
-		for (int i = 0; i < HIST_SIZE; i++) {
-			/* Use middle point of interval */
-			uint32 interval_log2 = pg_ceil_log2_32((delta + (HIST_MIN_INTERVAL*((1<<i) + ((1<<i)/2))/2)) / HIST_MIN_INTERVAL);
-			uint32 cell = Min(interval_log2, HIST_SIZE-1);
-			new_histogram[cell] += cState->regs[index][count].histogram[i];
-		}
-		memcpy(cState->regs[index][count].histogram, new_histogram, sizeof new_histogram);
-	}
-	cState->regs[index][count].ts = now;
-	cState->regs[index][count].histogram[0] += 1; // most recent access always goes to first histogram backet
-}
-
-static uint32_t
-getAccessCount(const HyperLogLogRegister* reg, time_t duration)
-{
-	uint32_t count = 0;
-//  Simplest solution is to take in account all points fro overlapped interval
-//	for (size_t i = 0; i < HIST_SIZE && HIST_MIN_INTERVAL*((1 << i)/2) <= duration; i++) {
-	for (size_t i = 0; i < HIST_SIZE; i++) {
-		uint32_t high_boundary = HIST_MIN_INTERVAL*(1 << i);
-		uint32_t low_boundary = HIST_MIN_INTERVAL*((1 << i)/2);
-		if (high_boundary >= duration) {
-			// Assume uniform distribution of points within interval and use proportional number of points
-			Assert(duration >= low_boundary);
-			count += reg->histogram[i] * (duration - low_boundary) / (high_boundary - low_boundary);
-			break; // it's last interval within specified time range
-		} else {
-			count += reg->histogram[i];
-		}
-	}
-	return count;
+	cState->regs[index][count] = now;
 }

 static uint8
-getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, double min_hit_ratio)
+getMaximum(const TimestampTz* reg, TimestampTz since)
 {
 	uint8 max = 0;
-	size_t i, j;
-	if (min_hit_ratio == 1.0)
+
+	for (size_t i = 0; i < HLL_C_BITS + 1; i++)
 	{
-		for (i = 0; i < HLL_C_BITS + 1; i++)
+		if (reg[i] >= since)
 		{
-			if (reg[i].ts >= since)
-			{
-				max = i;
-			}
-		}
-	}
-	else
-	{
-		uint32_t total_count = 0;
-		for (i = 0; i < HLL_C_BITS + 1; i++)
-		{
-			total_count += getAccessCount(&reg[i], duration);
-		}
-		if (total_count != 0)
-		{
-			for (i = 0; i < HLL_C_BITS + 1; i++)
-			{
-				// Take in account only bits with access frequncy exceeding maximal miss rate (1 - hit rate)
-				if (reg[i].ts >= since && 1.0 - (double)getAccessCount(&reg[i], duration) / total_count <= min_hit_ratio)
-				{
-					max = i;
-				}
-			}
+			max = i;
 		}
 	}
+
 	return max;
 }

@@ -206,7 +150,7 @@ getMaximum(const HyperLogLogRegister* reg, TimestampTz since, time_t duration, d
 * Estimates cardinality, based on elements added so far
 */
 double
-estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)
+estimateSHLL(HyperLogLogState *cState, time_t duration)
 {
 	double		result;
 	double		sum = 0.0;
@@ -217,7 +161,7 @@ estimateSHLL(HyperLogLogState *cState, time_t duration, double min_hit_ratio)

 	for (i = 0; i < HLL_N_REGISTERS; i++)
 	{
-		R[i] = getMaximum(cState->regs[i], since, duration, min_hit_ratio);
+		R[i] = getMaximum(cState->regs[i], since);
 		sum += 1.0 / pow(2.0, R[i]);
 	}

--- a/pgxn/neon/hll.h
+++ b/pgxn/neon/hll.h
@@ -53,14 +53,6 @@
 #define HLL_C_BITS      (32 - HLL_BIT_WIDTH)
 #define HLL_N_REGISTERS (1 << HLL_BIT_WIDTH)

-/*
- * Number of histogram cells. We use exponential histogram with first interval
- * equals to one minutes. Autoscaler request LFC  statistic with intervals 1,2,...,60 minutes
- * so 2^8=64 seems to be enough for our needs.
- */
-#define HIST_SIZE         8
-#define HIST_MIN_INTERVAL 60 /* seconds */
-
 /*
 * HyperLogLog is an approximate technique for computing the number of distinct
 * entries in a set.  Importantly, it does this by using a fixed amount of
@@ -77,21 +69,18 @@
 * modified timestamp >= the query timestamp. This value is the number of bits
 * for this register in the normal HLL calculation.
 *
- * The memory usage is 2^B * (C + 1) * sizeof(HyperLogLogRegister), or 920kiB.
+ * The memory usage is 2^B * (C + 1) * sizeof(TimetampTz), or 184kiB.
+ * Usage could be halved if we decide to reduce the required time dimension
+ * precision; as 32 bits in second precision should be enough for statistics.
+ * However, that is not yet implemented.
 */
-typedef struct
-{
-	TimestampTz ts; /* last access timestamp */
-	uint32_t    histogram[HIST_SIZE]; /* access counter exponential histogram */
-} HyperLogLogRegister;
-
 typedef struct HyperLogLogState
 {
-	HyperLogLogRegister regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
+	TimestampTz regs[HLL_N_REGISTERS][HLL_C_BITS + 1];
 } HyperLogLogState;

 extern void   initSHLL(HyperLogLogState *cState);
 extern void   addSHLL(HyperLogLogState *cState, uint32 hash);
-extern double estimateSHLL(HyperLogLogState *cState, time_t dutration, double min_hit_ratio);
+extern double estimateSHLL(HyperLogLogState *cState, time_t dutration);

 #endif
--- a/pgxn/neon/neon--1.4--1.5.sql
+++ b/pgxn/neon/neon--1.4--1.5.sql
@@ -1,10 +0,0 @@
-\echo Use "ALTER EXTENSION neon UPDATE TO '1.5'" to load this file. \quit
-
-- returns minimal LFC cache size (in 8kb pages) provided specified hit rate
-CREATE FUNCTION approximate_optimal_cache_size(duration_sec integer default null, min_hit_ration float8 default null)
-RETURNS integer
-AS 'MODULE_PATHNAME', 'approximate_optimal_cache_size'
-LANGUAGE C PARALLEL SAFE;
-
-GRANT EXECUTE ON FUNCTION approximate_optimal_cache_size(integer,float8) TO pg_monitor;
-
--- a/pgxn/neon/neon--1.5--1.4.sql
+++ b/pgxn/neon/neon--1.5--1.4.sql
@@ -1 +0,0 @@
-DROP FUNCTION IF EXISTS approximate_optimal_cache_size(integer,float8) CASCADE;
--- a/safekeeper/src/http/openapi_spec.yaml
+++ b/safekeeper/src/http/openapi_spec.yaml
@@ -1,7 +1,11 @@
 openapi: "3.0.2"
 info:
  title: Safekeeper control API
+  description: Neon Safekeeper API
  version: "1.0"
+  license:
+    name: "Apache"
+    url: https://github.com/neondatabase/neon/blob/main/LICENSE


 servers:
@@ -386,6 +390,12 @@ components:
        msg:
          type: string

+    NotFoundError:
+      type: object
+      properties:
+        msg:
+          type: string
+
  responses:

    #
--- a/test_runner/regress/test_lfc_working_set_approximation.py
+++ b/test_runner/regress/test_lfc_working_set_approximation.py
@@ -114,46 +114,3 @@ def test_sliding_working_set_approximation(neon_simple_env: NeonEnv):

    assert estimation_1k >= 20 and estimation_1k <= 40
    assert estimation_10k >= 200 and estimation_10k <= 400
-
-
-def test_optimal_cache_size_approximation(neon_simple_env: NeonEnv):
-    env = neon_simple_env
-
-    endpoint = env.endpoints.create_start(
-        branch_name="main",
-        config_lines=[
-            "autovacuum = off",
-            "shared_buffers=1MB",
-            "neon.max_file_cache_size=256MB",
-            "neon.file_cache_size_limit=245MB",
-        ],
-    )
-    conn = endpoint.connect()
-    cur = conn.cursor()
-    cur.execute("create extension neon version '1.5'")
-    cur.execute(
-        "create table t_huge(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
-    )
-    cur.execute(
-        "create table t_small(pk integer primary key, count integer default 0, payload text default repeat('?', 128))"
-    )
-    cur.execute(
-        "insert into t_huge(pk) values (generate_series(1,1000000))"
-    )  # table size is 21277 pages
-    cur.execute(
-        "insert into t_small(pk) values (generate_series(1,100000))"
-    )  # table size is 2128 pages
-    time.sleep(2)
-    before = time.monotonic()
-    for _ in range(100):
-        cur.execute("select sum(count) from t_small")
-    cur.execute("select sum(count) from t_huge")
-    after = time.monotonic()
-    cur.execute(f"select approximate_working_set_size_seconds({int(after - before + 1)})")
-    ws_estimation = cur.fetchall()[0][0]
-    log.info(f"Working set size estimaton {ws_estimation}")
-    cur.execute(f"select approximate_optimal_cache_size({int(after - before + 1)}, 0.99)")
-    optimal_cache_size = cur.fetchall()[0][0]
-    log.info(f"Optimal cache size for 99% hit rate {optimal_cache_size}")
-    assert ws_estimation >= 20000 and ws_estimation <= 30000
-    assert optimal_cache_size >= 2000 and optimal_cache_size <= 3000
--- a/vendor/postgres-v14
+++ b/vendor/postgres-v14
--- a/vendor/postgres-v15
+++ b/vendor/postgres-v15
--- a/vendor/postgres-v16
+++ b/vendor/postgres-v16
--- a/vendor/postgres-v17
+++ b/vendor/postgres-v17
--- a/vendor/revisions.json
+++ b/vendor/revisions.json
@@ -1,18 +1,18 @@
 {
  "v17": [
    "17rc1",
-    "9156d63ce253bed9d1f76355ceec610e444eaffa"
+    "5bbb9bd93dd805e90bd8af15d00080363d18ec68"
  ],
  "v16": [
    "16.4",
-    "0baa7346dfd42d61912eeca554c9bb0a190f0a1e"
+    "3ec6e2496f64c6fec35c67cb82efd6490a6a4738"
  ],
  "v15": [
    "15.8",
-    "6f6d77fb5960602fcd3fd130aca9f99ecb1619c9"
+    "72b904c0b3ac43bd74d1e8e6d772e2c476ae25b1"
  ],
  "v14": [
    "14.13",
-    "a317b9b5b96978b49e78986697f3dd80d06f99a7"
+    "87cb68f899db434cd6f1908cf0ac8fdeafdd88c1"
  ]
 }
Author	SHA1	Message	Date
James Broadhead	f79048f0ec	gci autocommit	2024-09-20 14:46:19 +01:00
James Broadhead	3476727923	Readme: add more rustup steps	2024-09-20 14:44:15 +01:00
Alexander Bayandin	3104f0f250	Safekeeper: fix OpenAPI spec (#9066 ) ## Problem Safekeeper's OpenAPI spec is incorrect: ``` Semantic error at paths./v1/tenant/{tenant_id}/timeline/{timeline_id}.get.responses.404.content.application/json.schema.$ref $refs must reference a valid location in the document Jump to line 126 ``` Checked on https://editor.swagger.io ## Summary of changes - Add `NotFoundError` - Add `description` and `license` fields to make Cloud OpenAPI spec linter happy	2024-09-20 12:00:05 +01:00
Arseny Sher	f2c08195f0	Bump vendor/postgres. Includes PRs: - ERROR out instead of segfaulting when walsender slots are full. - logical worker: respond to publisher even under dense stream.	2024-09-20 12:38:42 +03:00
				`@@ -1 +0,0 @@`
				`DROP FUNCTION IF EXISTS approximate_optimal_cache_size(integer,float8) CASCADE;`