mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-25 09:00:37 +00:00
Merge branch 'main' into erik/history-size-consumption-metric
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import urllib.parse
|
||||
from enum import StrEnum
|
||||
from typing import TYPE_CHECKING, final
|
||||
|
||||
import requests
|
||||
@@ -9,11 +10,23 @@ from requests.auth import AuthBase
|
||||
from typing_extensions import override
|
||||
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from requests import PreparedRequest
|
||||
|
||||
|
||||
COMPUTE_AUDIENCE = "compute"
|
||||
"""
|
||||
The value to place in the `aud` claim.
|
||||
"""
|
||||
|
||||
|
||||
@final
|
||||
class ComputeClaimsScope(StrEnum):
|
||||
ADMIN = "admin"
|
||||
|
||||
|
||||
@final
|
||||
class BearerAuth(AuthBase):
|
||||
"""
|
||||
@@ -50,6 +63,35 @@ class EndpointHttpClient(requests.Session):
|
||||
res.raise_for_status()
|
||||
return res.json()
|
||||
|
||||
def prewarm_lfc_status(self) -> dict[str, str]:
|
||||
res = self.get(f"http://localhost:{self.external_port}/lfc/prewarm")
|
||||
res.raise_for_status()
|
||||
json: dict[str, str] = res.json()
|
||||
return json
|
||||
|
||||
def prewarm_lfc(self):
|
||||
self.post(f"http://localhost:{self.external_port}/lfc/prewarm").raise_for_status()
|
||||
|
||||
def prewarmed():
|
||||
json = self.prewarm_lfc_status()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status == "completed", f"{status}, error {err}"
|
||||
|
||||
wait_until(prewarmed)
|
||||
|
||||
def offload_lfc(self):
|
||||
url = f"http://localhost:{self.external_port}/lfc/offload"
|
||||
self.post(url).raise_for_status()
|
||||
|
||||
def offloaded():
|
||||
res = self.get(url)
|
||||
res.raise_for_status()
|
||||
json = res.json()
|
||||
status, err = json["status"], json.get("error")
|
||||
assert status == "completed", f"{status}, error {err}"
|
||||
|
||||
wait_until(offloaded)
|
||||
|
||||
def database_schema(self, database: str):
|
||||
res = self.get(
|
||||
f"http://localhost:{self.external_port}/database_schema?database={urllib.parse.quote(database, safe='')}",
|
||||
|
||||
@@ -21,6 +21,7 @@ if TYPE_CHECKING:
|
||||
Any,
|
||||
)
|
||||
|
||||
from fixtures.endpoint.http import ComputeClaimsScope
|
||||
from fixtures.pg_version import PgVersion
|
||||
|
||||
|
||||
@@ -535,12 +536,16 @@ class NeonLocalCli(AbstractNeonCli):
|
||||
res.check_returncode()
|
||||
return res
|
||||
|
||||
def endpoint_generate_jwt(self, endpoint_id: str) -> str:
|
||||
def endpoint_generate_jwt(
|
||||
self, endpoint_id: str, scope: ComputeClaimsScope | None = None
|
||||
) -> str:
|
||||
"""
|
||||
Generate a JWT for making requests to the endpoint's external HTTP
|
||||
server.
|
||||
"""
|
||||
args = ["endpoint", "generate-jwt", endpoint_id]
|
||||
if scope:
|
||||
args += ["--scope", str(scope)]
|
||||
|
||||
cmd = self.raw_cli(args)
|
||||
cmd.check_returncode()
|
||||
|
||||
@@ -51,7 +51,7 @@ from fixtures.common_types import (
|
||||
TimelineId,
|
||||
)
|
||||
from fixtures.compute_migrations import NUM_COMPUTE_MIGRATIONS
|
||||
from fixtures.endpoint.http import EndpointHttpClient
|
||||
from fixtures.endpoint.http import ComputeClaimsScope, EndpointHttpClient
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import Metrics, MetricsGetter, parse_metrics
|
||||
from fixtures.neon_cli import NeonLocalCli, Pagectl
|
||||
@@ -1185,7 +1185,9 @@ class NeonEnv:
|
||||
"broker": {},
|
||||
"safekeepers": [],
|
||||
"pageservers": [],
|
||||
"endpoint_storage": {"port": self.port_distributor.get_port()},
|
||||
"endpoint_storage": {
|
||||
"listen_addr": f"127.0.0.1:{self.port_distributor.get_port()}",
|
||||
},
|
||||
"generate_local_ssl_certs": self.generate_local_ssl_certs,
|
||||
}
|
||||
|
||||
@@ -1279,7 +1281,8 @@ class NeonEnv:
|
||||
)
|
||||
|
||||
tenant_config = ps_cfg.setdefault("tenant_config", {})
|
||||
tenant_config["rel_size_v2_enabled"] = True # Enable relsize_v2 by default in tests
|
||||
# This feature is pending rollout.
|
||||
# tenant_config["rel_size_v2_enabled"] = True
|
||||
|
||||
if self.pageserver_remote_storage is not None:
|
||||
ps_cfg["remote_storage"] = remote_storage_to_toml_dict(
|
||||
@@ -4217,7 +4220,7 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
|
||||
self.config(config_lines)
|
||||
|
||||
self.__jwt = self.env.neon_cli.endpoint_generate_jwt(self.endpoint_id)
|
||||
self.__jwt = self.generate_jwt()
|
||||
|
||||
return self
|
||||
|
||||
@@ -4264,6 +4267,14 @@ class Endpoint(PgProtocol, LogUtils):
|
||||
|
||||
return self
|
||||
|
||||
def generate_jwt(self, scope: ComputeClaimsScope | None = None) -> str:
|
||||
"""
|
||||
Generate a JWT for making requests to the endpoint's external HTTP
|
||||
server.
|
||||
"""
|
||||
assert self.endpoint_id is not None
|
||||
return self.env.neon_cli.endpoint_generate_jwt(self.endpoint_id, scope)
|
||||
|
||||
def endpoint_path(self) -> Path:
|
||||
"""Path to endpoint directory"""
|
||||
assert self.endpoint_id
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import math # Add this import
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
@@ -87,7 +88,10 @@ def test_cumulative_statistics_persistence(
|
||||
- insert additional tuples that by itself are not enough to trigger auto-vacuum but in combination with the previous tuples are
|
||||
- verify that autovacuum is triggered by the combination of tuples inserted before and after endpoint suspension
|
||||
"""
|
||||
project = neon_api.create_project(pg_version)
|
||||
project = neon_api.create_project(
|
||||
pg_version,
|
||||
f"Test cumulative statistics persistence, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}",
|
||||
)
|
||||
project_id = project["project"]["id"]
|
||||
neon_api.wait_for_operation_to_finish(project_id)
|
||||
endpoint_id = project["endpoints"][0]["id"]
|
||||
|
||||
@@ -62,7 +62,9 @@ def test_ro_replica_lag(
|
||||
|
||||
pgbench_duration = f"-T{test_duration_min * 60 * 2}"
|
||||
|
||||
project = neon_api.create_project(pg_version)
|
||||
project = neon_api.create_project(
|
||||
pg_version, f"Test readonly replica lag, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}"
|
||||
)
|
||||
project_id = project["project"]["id"]
|
||||
log.info("Project ID: %s", project_id)
|
||||
log.info("Primary endpoint ID: %s", project["endpoints"][0]["id"])
|
||||
@@ -195,7 +197,9 @@ def test_replication_start_stop(
|
||||
pgbench_duration = f"-T{2**num_replicas * configuration_test_time_sec}"
|
||||
error_occurred = False
|
||||
|
||||
project = neon_api.create_project(pg_version)
|
||||
project = neon_api.create_project(
|
||||
pg_version, f"Test replication start stop, GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}"
|
||||
)
|
||||
project_id = project["project"]["id"]
|
||||
log.info("Project ID: %s", project_id)
|
||||
log.info("Primary endpoint ID: %s", project["endpoints"][0]["id"])
|
||||
|
||||
@@ -206,7 +206,7 @@ class NeonProject:
|
||||
self.neon_api = neon_api
|
||||
self.pg_bin = pg_bin
|
||||
proj = self.neon_api.create_project(
|
||||
pg_version, f"Automatic random API test {os.getenv('GITHUB_RUN_ID')}"
|
||||
pg_version, f"Automatic random API test GITHUB_RUN_ID={os.getenv('GITHUB_RUN_ID')}"
|
||||
)
|
||||
self.id: str = proj["project"]["id"]
|
||||
self.name: str = proj["project"]["name"]
|
||||
|
||||
@@ -186,7 +186,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"type": "interpreted",
|
||||
"args": {"format": "bincode", "compression": {"zstd": {"level": 1}}},
|
||||
},
|
||||
"rel_size_v2_enabled": False, # test suite enables it by default as of https://github.com/neondatabase/neon/issues/11081, so, custom config means disabling it
|
||||
"rel_size_v2_enabled": True,
|
||||
"gc_compaction_enabled": True,
|
||||
"gc_compaction_verification": False,
|
||||
"gc_compaction_initial_threshold_kb": 1024000,
|
||||
|
||||
@@ -202,6 +202,8 @@ def test_pageserver_gc_compaction_preempt(
|
||||
env = neon_env_builder.init_start(initial_tenant_conf=conf)
|
||||
|
||||
env.pageserver.allowed_errors.append(".*The timeline or pageserver is shutting down.*")
|
||||
env.pageserver.allowed_errors.append(".*flush task cancelled.*")
|
||||
env.pageserver.allowed_errors.append(".*failed to pipe.*")
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
@@ -229,7 +231,7 @@ def test_pageserver_gc_compaction_preempt(
|
||||
|
||||
|
||||
@skip_in_debug_build("only run with release build")
|
||||
@pytest.mark.timeout(600) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.timeout(900) # This test is slow with sanitizers enabled, especially on ARM
|
||||
@pytest.mark.parametrize(
|
||||
"with_branches",
|
||||
["with_branches", "no_branches"],
|
||||
|
||||
@@ -544,3 +544,69 @@ def test_drop_role_with_table_privileges_from_non_neon_superuser(neon_simple_env
|
||||
)
|
||||
role = cursor.fetchone()
|
||||
assert role is None
|
||||
|
||||
|
||||
def test_db_with_custom_settings(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test that compute_ctl can work with databases that have some custom settings.
|
||||
For example, role=some_other_role, default_transaction_read_only=on,
|
||||
search_path=non_public_schema, statement_timeout=1 (1ms).
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
TEST_ROLE = "some_other_role"
|
||||
TEST_DB = "db_with_custom_settings"
|
||||
TEST_SCHEMA = "non_public_schema"
|
||||
|
||||
endpoint.respec_deep(
|
||||
**{
|
||||
"spec": {
|
||||
"skip_pg_catalog_updates": False,
|
||||
"cluster": {
|
||||
"databases": [
|
||||
{
|
||||
"name": TEST_DB,
|
||||
"owner": TEST_ROLE,
|
||||
}
|
||||
],
|
||||
"roles": [
|
||||
{
|
||||
"name": TEST_ROLE,
|
||||
}
|
||||
],
|
||||
},
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
endpoint.reconfigure()
|
||||
|
||||
with endpoint.cursor(dbname=TEST_DB) as cursor:
|
||||
cursor.execute(f"CREATE SCHEMA {TEST_SCHEMA}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET role = {TEST_ROLE}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET default_transaction_read_only = on")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET search_path = {TEST_SCHEMA}")
|
||||
cursor.execute(f"ALTER DATABASE {TEST_DB} SET statement_timeout = 1")
|
||||
|
||||
with endpoint.cursor(dbname=TEST_DB) as cursor:
|
||||
cursor.execute("SELECT current_role")
|
||||
role = cursor.fetchone()
|
||||
assert role is not None
|
||||
assert role[0] == TEST_ROLE
|
||||
|
||||
cursor.execute("SHOW default_transaction_read_only")
|
||||
default_transaction_read_only = cursor.fetchone()
|
||||
assert default_transaction_read_only is not None
|
||||
assert default_transaction_read_only[0] == "on"
|
||||
|
||||
cursor.execute("SHOW search_path")
|
||||
search_path = cursor.fetchone()
|
||||
assert search_path is not None
|
||||
assert search_path[0] == TEST_SCHEMA
|
||||
|
||||
# Do not check statement_timeout, because we force it to 2min
|
||||
# in `endpoint.cursor()` fixture.
|
||||
|
||||
endpoint.reconfigure()
|
||||
|
||||
78
test_runner/regress/test_compute_http.py
Normal file
78
test_runner/regress/test_compute_http.py
Normal file
@@ -0,0 +1,78 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from http.client import FORBIDDEN, UNAUTHORIZED
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
import jwt
|
||||
import pytest
|
||||
from fixtures.endpoint.http import COMPUTE_AUDIENCE, ComputeClaimsScope, EndpointHttpClient
|
||||
from fixtures.utils import run_only_on_default_postgres
|
||||
from requests import RequestException
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
|
||||
@run_only_on_default_postgres("The code path being tested is not dependent on Postgres version")
|
||||
def test_compute_no_scope_claim(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Test that if the JWT scope is not admin and no compute_id is specified,
|
||||
the external HTTP server returns a 403 Forbidden error.
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
# Encode nothing in the token
|
||||
token = jwt.encode({}, env.auth_keys.priv, algorithm="EdDSA")
|
||||
|
||||
# Create an admin-scoped HTTP client
|
||||
client = EndpointHttpClient(
|
||||
external_port=endpoint.external_http_port,
|
||||
internal_port=endpoint.internal_http_port,
|
||||
jwt=token,
|
||||
)
|
||||
|
||||
try:
|
||||
client.status()
|
||||
pytest.fail("Exception should have been raised")
|
||||
except RequestException as e:
|
||||
assert e.response is not None
|
||||
assert e.response.status_code == FORBIDDEN
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"audience",
|
||||
(COMPUTE_AUDIENCE, "invalid", None),
|
||||
ids=["with_audience", "with_invalid_audience", "without_audience"],
|
||||
)
|
||||
@run_only_on_default_postgres("The code path being tested is not dependent on Postgres version")
|
||||
def test_compute_admin_scope_claim(neon_simple_env: NeonEnv, audience: str | None):
|
||||
"""
|
||||
Test that an admin-scoped JWT can access the compute's external HTTP server
|
||||
without the compute_id being specified in the claims.
|
||||
"""
|
||||
env = neon_simple_env
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
|
||||
data: dict[str, str | list[str]] = {"scope": str(ComputeClaimsScope.ADMIN)}
|
||||
if audience:
|
||||
data["aud"] = [audience]
|
||||
|
||||
token = jwt.encode(data, env.auth_keys.priv, algorithm="EdDSA")
|
||||
|
||||
# Create an admin-scoped HTTP client
|
||||
client = EndpointHttpClient(
|
||||
external_port=endpoint.external_http_port,
|
||||
internal_port=endpoint.internal_http_port,
|
||||
jwt=token,
|
||||
)
|
||||
|
||||
try:
|
||||
client.status()
|
||||
if audience != COMPUTE_AUDIENCE:
|
||||
pytest.fail("Exception should have been raised")
|
||||
except RequestException as e:
|
||||
assert e.response is not None
|
||||
assert e.response.status_code == UNAUTHORIZED
|
||||
@@ -4,10 +4,12 @@ import pytest
|
||||
from aiohttp import ClientSession
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.utils import run_only_on_default_postgres
|
||||
from jwcrypto import jwk, jwt
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@run_only_on_default_postgres("test doesn't use postgres")
|
||||
async def test_endpoint_storage_insert_retrieve_delete(neon_simple_env: NeonEnv):
|
||||
"""
|
||||
Inserts, retrieves, and deletes test file using a JWT token
|
||||
@@ -35,7 +37,6 @@ async def test_endpoint_storage_insert_retrieve_delete(neon_simple_env: NeonEnv)
|
||||
key = f"http://{base_url}/{tenant_id}/{timeline_id}/{endpoint_id}/key"
|
||||
headers = {"Authorization": f"Bearer {token}"}
|
||||
log.info(f"cache key url {key}")
|
||||
log.info(f"token {token}")
|
||||
|
||||
async with ClientSession(headers=headers) as session:
|
||||
async with session.get(key) as res:
|
||||
|
||||
28
test_runner/regress/test_gist.py
Normal file
28
test_runner/regress/test_gist.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
|
||||
|
||||
#
|
||||
# Test unlogged build for GIST index
|
||||
#
|
||||
def test_gist(neon_simple_env: NeonEnv):
|
||||
env = neon_simple_env
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
con = endpoint.connect()
|
||||
cur = con.cursor()
|
||||
iterations = 100
|
||||
|
||||
for _ in range(iterations):
|
||||
cur.execute(
|
||||
"CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off)"
|
||||
)
|
||||
cur.execute(
|
||||
"INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,1000) i"
|
||||
)
|
||||
cur.execute("CREATE INDEX gist_pvactst ON pvactst USING gist (p)")
|
||||
cur.execute("VACUUM pvactst")
|
||||
cur.execute("DROP TABLE pvactst")
|
||||
@@ -1,11 +1,24 @@
|
||||
import random
|
||||
import threading
|
||||
import time
|
||||
from enum import Enum
|
||||
|
||||
import pytest
|
||||
from fixtures.endpoint.http import EndpointHttpClient
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnv
|
||||
from fixtures.utils import USE_LFC
|
||||
from prometheus_client.parser import text_string_to_metric_families as prom_parse_impl
|
||||
|
||||
|
||||
class LfcQueryMethod(Enum):
|
||||
COMPUTE_CTL = False
|
||||
POSTGRES = True
|
||||
|
||||
|
||||
PREWARM_LABEL = "compute_ctl_lfc_prewarm_requests_total"
|
||||
OFFLOAD_LABEL = "compute_ctl_lfc_offload_requests_total"
|
||||
QUERY_OPTIONS = LfcQueryMethod.POSTGRES, LfcQueryMethod.COMPUTE_CTL
|
||||
|
||||
|
||||
def check_pinned_entries(cur):
|
||||
@@ -19,11 +32,20 @@ def check_pinned_entries(cur):
|
||||
assert n_pinned == 0
|
||||
|
||||
|
||||
def prom_parse(client: EndpointHttpClient) -> dict[str, float]:
|
||||
return {
|
||||
sample.name: sample.value
|
||||
for family in prom_parse_impl(client.metrics())
|
||||
for sample in family.samples
|
||||
if sample.name in (PREWARM_LABEL, OFFLOAD_LABEL)
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
def test_lfc_prewarm(neon_simple_env: NeonEnv):
|
||||
@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"])
|
||||
def test_lfc_prewarm(neon_simple_env: NeonEnv, query: LfcQueryMethod):
|
||||
env = neon_simple_env
|
||||
n_records = 1000000
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
@@ -34,30 +56,57 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv):
|
||||
"neon.file_cache_prewarm_limit=1000",
|
||||
],
|
||||
)
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon version '1.6'")
|
||||
cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
|
||||
cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
|
||||
cur.execute("select get_local_cache_state()")
|
||||
lfc_state = cur.fetchall()[0][0]
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("create extension neon version '1.6'")
|
||||
pg_cur.execute("create database lfc")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
log.info(f"Inserting {n_records} rows")
|
||||
lfc_cur.execute("create table t(pk integer primary key, payload text default repeat('?', 128))")
|
||||
lfc_cur.execute(f"insert into t (pk) values (generate_series(1,{n_records}))")
|
||||
log.info(f"Inserted {n_records} rows")
|
||||
|
||||
http_client = endpoint.http_client()
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
status = http_client.prewarm_lfc_status()
|
||||
assert status["status"] == "not_prewarmed"
|
||||
assert "error" not in status
|
||||
http_client.offload_lfc()
|
||||
assert http_client.prewarm_lfc_status()["status"] == "not_prewarmed"
|
||||
assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: 0}
|
||||
else:
|
||||
pg_cur.execute("select get_local_cache_state()")
|
||||
lfc_state = pg_cur.fetchall()[0][0]
|
||||
|
||||
endpoint.stop()
|
||||
endpoint.start()
|
||||
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
time.sleep(1) # wait until compute_ctl complete downgrade of extension to default version
|
||||
cur.execute("alter extension neon update to '1.6'")
|
||||
cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
# wait until compute_ctl completes downgrade of extension to default version
|
||||
time.sleep(1)
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("alter extension neon update to '1.6'")
|
||||
|
||||
cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'")
|
||||
lfc_used_pages = cur.fetchall()[0][0]
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
http_client.prewarm_lfc()
|
||||
else:
|
||||
pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
|
||||
pg_cur.execute("select lfc_value from neon_lfc_stats where lfc_key='file_cache_used_pages'")
|
||||
lfc_used_pages = pg_cur.fetchall()[0][0]
|
||||
log.info(f"Used LFC size: {lfc_used_pages}")
|
||||
cur.execute("select * from get_prewarm_info()")
|
||||
prewarm_info = cur.fetchall()[0]
|
||||
pg_cur.execute("select * from get_prewarm_info()")
|
||||
prewarm_info = pg_cur.fetchall()[0]
|
||||
log.info(f"Prewarm info: {prewarm_info}")
|
||||
log.info(f"Prewarm progress: {(prewarm_info[1] + prewarm_info[2]) * 100 // prewarm_info[0]}%")
|
||||
total, prewarmed, skipped, _ = prewarm_info
|
||||
progress = (prewarmed + skipped) * 100 // total
|
||||
log.info(f"Prewarm progress: {progress}%")
|
||||
|
||||
assert lfc_used_pages > 10000
|
||||
assert (
|
||||
@@ -66,18 +115,23 @@ def test_lfc_prewarm(neon_simple_env: NeonEnv):
|
||||
and prewarm_info[0] == prewarm_info[1] + prewarm_info[2]
|
||||
)
|
||||
|
||||
cur.execute("select sum(pk) from t")
|
||||
assert cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
||||
lfc_cur.execute("select sum(pk) from t")
|
||||
assert lfc_cur.fetchall()[0][0] == n_records * (n_records + 1) / 2
|
||||
|
||||
check_pinned_entries(cur)
|
||||
check_pinned_entries(pg_cur)
|
||||
|
||||
desired = {"status": "completed", "total": total, "prewarmed": prewarmed, "skipped": skipped}
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
assert http_client.prewarm_lfc_status() == desired
|
||||
assert prom_parse(http_client) == {OFFLOAD_LABEL: 0, PREWARM_LABEL: 1}
|
||||
|
||||
|
||||
@pytest.mark.skipif(not USE_LFC, reason="LFC is disabled, skipping")
|
||||
def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv):
|
||||
@pytest.mark.parametrize("query", QUERY_OPTIONS, ids=["postgres", "compute-ctl"])
|
||||
def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv, query: LfcQueryMethod):
|
||||
env = neon_simple_env
|
||||
n_records = 10000
|
||||
n_threads = 4
|
||||
|
||||
endpoint = env.endpoints.create_start(
|
||||
branch_name="main",
|
||||
config_lines=[
|
||||
@@ -87,40 +141,58 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv):
|
||||
"neon.file_cache_prewarm_limit=1000000",
|
||||
],
|
||||
)
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
cur.execute("create extension neon version '1.6'")
|
||||
cur.execute(
|
||||
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
pg_cur.execute("create extension neon version '1.6'")
|
||||
pg_cur.execute("CREATE DATABASE lfc")
|
||||
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
lfc_cur.execute(
|
||||
"create table accounts(id integer primary key, balance bigint default 0, payload text default repeat('?', 1000)) with (fillfactor=10)"
|
||||
)
|
||||
cur.execute(f"insert into accounts(id) values (generate_series(1,{n_records}))")
|
||||
cur.execute("select get_local_cache_state()")
|
||||
lfc_state = cur.fetchall()[0][0]
|
||||
log.info(f"Inserting {n_records} rows")
|
||||
lfc_cur.execute(f"insert into accounts(id) values (generate_series(1,{n_records}))")
|
||||
log.info(f"Inserted {n_records} rows")
|
||||
|
||||
http_client = endpoint.http_client()
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
http_client.offload_lfc()
|
||||
else:
|
||||
pg_cur.execute("select get_local_cache_state()")
|
||||
lfc_state = pg_cur.fetchall()[0][0]
|
||||
|
||||
running = True
|
||||
n_prewarms = 0
|
||||
|
||||
def workload():
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
lfc_conn = endpoint.connect(dbname="lfc")
|
||||
lfc_cur = lfc_conn.cursor()
|
||||
n_transfers = 0
|
||||
while running:
|
||||
src = random.randint(1, n_records)
|
||||
dst = random.randint(1, n_records)
|
||||
cur.execute("update accounts set balance=balance-100 where id=%s", (src,))
|
||||
cur.execute("update accounts set balance=balance+100 where id=%s", (dst,))
|
||||
lfc_cur.execute("update accounts set balance=balance-100 where id=%s", (src,))
|
||||
lfc_cur.execute("update accounts set balance=balance+100 where id=%s", (dst,))
|
||||
n_transfers += 1
|
||||
log.info(f"Number of transfers: {n_transfers}")
|
||||
|
||||
def prewarm():
|
||||
conn = endpoint.connect()
|
||||
cur = conn.cursor()
|
||||
n_prewarms = 0
|
||||
pg_conn = endpoint.connect()
|
||||
pg_cur = pg_conn.cursor()
|
||||
while running:
|
||||
cur.execute("alter system set neon.file_cache_size_limit='1MB'")
|
||||
cur.execute("select pg_reload_conf()")
|
||||
cur.execute("alter system set neon.file_cache_size_limit='1GB'")
|
||||
cur.execute("select pg_reload_conf()")
|
||||
cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
pg_cur.execute("alter system set neon.file_cache_size_limit='1MB'")
|
||||
pg_cur.execute("select pg_reload_conf()")
|
||||
pg_cur.execute("alter system set neon.file_cache_size_limit='1GB'")
|
||||
pg_cur.execute("select pg_reload_conf()")
|
||||
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
http_client.prewarm_lfc()
|
||||
else:
|
||||
pg_cur.execute("select prewarm_local_cache(%s)", (lfc_state,))
|
||||
|
||||
nonlocal n_prewarms
|
||||
n_prewarms += 1
|
||||
log.info(f"Number of prewarms: {n_prewarms}")
|
||||
|
||||
@@ -140,8 +212,10 @@ def test_lfc_prewarm_under_workload(neon_simple_env: NeonEnv):
|
||||
t.join()
|
||||
prewarm_thread.join()
|
||||
|
||||
cur.execute("select sum(balance) from accounts")
|
||||
total_balance = cur.fetchall()[0][0]
|
||||
lfc_cur.execute("select sum(balance) from accounts")
|
||||
total_balance = lfc_cur.fetchall()[0][0]
|
||||
assert total_balance == 0
|
||||
|
||||
check_pinned_entries(cur)
|
||||
check_pinned_entries(pg_cur)
|
||||
if query is LfcQueryMethod.COMPUTE_CTL:
|
||||
assert prom_parse(http_client) == {OFFLOAD_LABEL: 1, PREWARM_LABEL: n_prewarms}
|
||||
|
||||
@@ -506,7 +506,6 @@ class SyntheticSizeVerifier:
|
||||
|
||||
PER_METRIC_VERIFIERS = {
|
||||
"remote_storage_size": CannotVerifyAnything,
|
||||
"resident_size": CannotVerifyAnything,
|
||||
"written_size": WrittenDataVerifier,
|
||||
"written_data_bytes_delta": WrittenDataDeltaVerifier,
|
||||
"pitr_cutoff": CannotVerifyAnything,
|
||||
|
||||
@@ -471,7 +471,7 @@ def test_tx_abort_with_many_relations(
|
||||
try:
|
||||
# Rollback phase should be fast: this is one WAL record that we should process efficiently
|
||||
fut = exec.submit(rollback_and_wait)
|
||||
fut.result(timeout=15)
|
||||
fut.result(timeout=15 if reldir_type == "v1" else 30)
|
||||
except:
|
||||
exec.shutdown(wait=False, cancel_futures=True)
|
||||
raise
|
||||
|
||||
@@ -1334,6 +1334,13 @@ def test_sharding_split_failures(
|
||||
tenant_id, timeline_id, shard_count=initial_shard_count, placement_policy='{"Attached":1}'
|
||||
)
|
||||
|
||||
# Create bystander tenants with various shard counts. They should not be affected by the aborted
|
||||
# splits. Regression test for https://github.com/neondatabase/cloud/issues/28589.
|
||||
bystanders = {} # id → shard_count
|
||||
for bystander_shard_count in [1, 2, 4, 8]:
|
||||
id, _ = env.create_tenant(shard_count=bystander_shard_count)
|
||||
bystanders[id] = bystander_shard_count
|
||||
|
||||
env.storage_controller.allowed_errors.extend(
|
||||
[
|
||||
# All split failures log a warning when then enqueue the abort operation
|
||||
@@ -1394,6 +1401,8 @@ def test_sharding_split_failures(
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == initial_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
@@ -1414,6 +1423,8 @@ def test_sharding_split_failures(
|
||||
locations = ps.http_client().tenant_list_locations()["tenant_shards"]
|
||||
for loc in locations:
|
||||
tenant_shard_id = TenantShardId.parse(loc[0])
|
||||
if tenant_shard_id.tenant_id != tenant_id:
|
||||
continue # skip bystanders
|
||||
log.info(f"Shard {tenant_shard_id} seen on node {ps.id} in mode {loc[1]['mode']}")
|
||||
assert tenant_shard_id.shard_count == split_shard_count
|
||||
if loc[1]["mode"] == "Secondary":
|
||||
@@ -1496,6 +1507,12 @@ def test_sharding_split_failures(
|
||||
# the scheduler reaches an idle state
|
||||
env.storage_controller.reconcile_until_idle(timeout_secs=30)
|
||||
|
||||
# Check that all bystanders are still around.
|
||||
for bystander_id, bystander_shard_count in bystanders.items():
|
||||
response = env.storage_controller.tenant_describe(bystander_id)
|
||||
assert TenantId(response["tenant_id"]) == bystander_id
|
||||
assert len(response["shards"]) == bystander_shard_count
|
||||
|
||||
env.storage_controller.consistency_check()
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user