mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-26 09:30:37 +00:00
Release 2023 01 31 (#3497)
Co-authored-by: Kirill Bulatov <kirill@neon.tech> Co-authored-by: Heikki Linnakangas <heikki@neon.tech> Co-authored-by: Anastasia Lubennikova <anastasia@neon.tech> Co-authored-by: bojanserafimov <bojan.serafimov7@gmail.com> Co-authored-by: Christian Schwarz <christian@neon.tech> Co-authored-by: Alexey Kondratov <kondratov.aleksey@gmail.com> Co-authored-by: Joonas Koivunen <joonas@neon.tech> Co-authored-by: Konstantin Knizhnik <knizhnik@garret.ru> Co-authored-by: Shany Pozin <shany@neon.tech> Co-authored-by: Sergey Melnikov <sergey@neon.tech> Co-authored-by: Dmitry Rodionov <dmitry@neon.tech> Co-authored-by: Rory de Zoete <33318916+zoete@users.noreply.github.com> Co-authored-by: Rory de Zoete <rdezoete@Rorys-Mac-Studio.fritz.box> Co-authored-by: Rory de Zoete <rdezoete@RorysMacStudio.fritz.box> Co-authored-by: Lassi Pölönen <lassi.polonen@iki.fi>
This commit is contained in:
@@ -46,6 +46,12 @@ PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_remote_physical_size",
|
||||
)
|
||||
|
||||
PAGESERVER_GLOBAL_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_storage_operations_seconds_global_count",
|
||||
"pageserver_storage_operations_seconds_global_sum",
|
||||
"pageserver_storage_operations_seconds_global_bucket",
|
||||
)
|
||||
|
||||
PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_current_logical_size",
|
||||
"pageserver_resident_physical_size",
|
||||
@@ -61,13 +67,13 @@ PAGESERVER_PER_TENANT_METRICS: Tuple[str, ...] = (
|
||||
"pageserver_smgr_query_seconds_bucket",
|
||||
"pageserver_smgr_query_seconds_count",
|
||||
"pageserver_smgr_query_seconds_sum",
|
||||
"pageserver_storage_operations_seconds_bucket",
|
||||
"pageserver_storage_operations_seconds_count",
|
||||
"pageserver_storage_operations_seconds_sum",
|
||||
"pageserver_storage_operations_seconds_count_total",
|
||||
"pageserver_storage_operations_seconds_sum_total",
|
||||
"pageserver_wait_lsn_seconds_bucket",
|
||||
"pageserver_wait_lsn_seconds_count",
|
||||
"pageserver_wait_lsn_seconds_sum",
|
||||
"pageserver_created_persistent_files_total",
|
||||
"pageserver_written_persistent_bytes_total",
|
||||
"pageserver_tenant_states_count",
|
||||
*PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
|
||||
)
|
||||
|
||||
@@ -2,7 +2,15 @@ from contextlib import closing
|
||||
|
||||
import psycopg2.extras
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder
|
||||
from fixtures.neon_fixtures import (
|
||||
LocalFsStorage,
|
||||
NeonEnvBuilder,
|
||||
RemoteStorageKind,
|
||||
assert_tenant_status,
|
||||
wait_for_upload,
|
||||
)
|
||||
from fixtures.types import Lsn
|
||||
from fixtures.utils import wait_until
|
||||
|
||||
|
||||
def test_tenant_config(neon_env_builder: NeonEnvBuilder):
|
||||
@@ -57,7 +65,7 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}"""
|
||||
"compaction_period": 20,
|
||||
"compaction_threshold": 10,
|
||||
"gc_horizon": 67108864,
|
||||
"gc_period": 100,
|
||||
"gc_period": 60 * 60,
|
||||
"image_creation_threshold": 3,
|
||||
"pitr_interval": 604800, # 7 days
|
||||
}.items()
|
||||
@@ -158,3 +166,46 @@ tenant_config={checkpoint_distance = 10000, compaction_target_size = 1048576}"""
|
||||
"pitr_interval": 60,
|
||||
}.items()
|
||||
)
|
||||
|
||||
|
||||
def test_creating_tenant_conf_after_attach(neon_env_builder: NeonEnvBuilder):
|
||||
neon_env_builder.enable_remote_storage(
|
||||
remote_storage_kind=RemoteStorageKind.LOCAL_FS,
|
||||
test_name="test_creating_tenant_conf_after_attach",
|
||||
)
|
||||
|
||||
env = neon_env_builder.init_start()
|
||||
assert isinstance(env.remote_storage, LocalFsStorage)
|
||||
|
||||
# tenant is created with defaults, as in without config file
|
||||
(tenant_id, timeline_id) = env.neon_cli.create_tenant()
|
||||
config_path = env.repo_dir / "tenants" / str(tenant_id) / "config"
|
||||
assert config_path.exists(), "config file is always initially created"
|
||||
|
||||
http_client = env.pageserver.http_client()
|
||||
|
||||
detail = http_client.timeline_detail(tenant_id, timeline_id)
|
||||
last_record_lsn = Lsn(detail["last_record_lsn"])
|
||||
assert last_record_lsn.lsn_int != 0, "initdb must have executed"
|
||||
|
||||
wait_for_upload(http_client, tenant_id, timeline_id, last_record_lsn)
|
||||
|
||||
http_client.tenant_detach(tenant_id)
|
||||
|
||||
assert not config_path.exists(), "detach did not remove config file"
|
||||
|
||||
http_client.tenant_attach(tenant_id)
|
||||
wait_until(
|
||||
number_of_iterations=5,
|
||||
interval=1,
|
||||
func=lambda: assert_tenant_status(http_client, tenant_id, "Active"),
|
||||
)
|
||||
|
||||
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "1000000"})
|
||||
contents_first = config_path.read_text()
|
||||
env.neon_cli.config_tenant(tenant_id, {"gc_horizon": "0"})
|
||||
contents_later = config_path.read_text()
|
||||
|
||||
# dont test applying the setting here, we have that another test case to show it
|
||||
# we just care about being able to create the file
|
||||
assert len(contents_first) > len(contents_later)
|
||||
|
||||
@@ -6,6 +6,7 @@ from threading import Thread
|
||||
import asyncpg
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import parse_metrics
|
||||
from fixtures.neon_fixtures import (
|
||||
NeonEnv,
|
||||
NeonEnvBuilder,
|
||||
@@ -59,11 +60,11 @@ def test_tenant_reattach(
|
||||
# create new nenant
|
||||
tenant_id, timeline_id = env.neon_cli.create_tenant()
|
||||
|
||||
pg = env.postgres.create_start("main", tenant_id=tenant_id)
|
||||
with pg.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
cur.execute("CREATE TABLE t(key int primary key, value text)")
|
||||
cur.execute("INSERT INTO t SELECT generate_series(1,100000), 'payload'")
|
||||
current_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_flush_lsn()"))
|
||||
|
||||
# Wait for the all data to be processed by the pageserver and uploaded in remote storage
|
||||
wait_for_last_record_lsn(pageserver_http, tenant_id, timeline_id, current_lsn)
|
||||
@@ -78,15 +79,34 @@ def test_tenant_reattach(
|
||||
".*failed to perform remote task UploadMetadata.*, will retry.*"
|
||||
)
|
||||
|
||||
ps_metrics = parse_metrics(pageserver_http.get_metrics(), "pageserver")
|
||||
tenant_metric_filter = {
|
||||
"tenant_id": str(tenant_id),
|
||||
"timeline_id": str(timeline_id),
|
||||
}
|
||||
pageserver_last_record_lsn_before_detach = int(
|
||||
ps_metrics.query_one("pageserver_last_record_lsn", filter=tenant_metric_filter).value
|
||||
)
|
||||
|
||||
pageserver_http.tenant_detach(tenant_id)
|
||||
pageserver_http.tenant_attach(tenant_id)
|
||||
|
||||
with pg.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
|
||||
time.sleep(1) # for metrics propagation
|
||||
|
||||
# Check that we had to retry the downloads
|
||||
assert env.pageserver.log_contains(".*list prefixes.*failed, will retry.*")
|
||||
assert env.pageserver.log_contains(".*download.*failed, will retry.*")
|
||||
ps_metrics = parse_metrics(pageserver_http.get_metrics(), "pageserver")
|
||||
pageserver_last_record_lsn = int(
|
||||
ps_metrics.query_one("pageserver_last_record_lsn", filter=tenant_metric_filter).value
|
||||
)
|
||||
|
||||
assert pageserver_last_record_lsn_before_detach == pageserver_last_record_lsn
|
||||
|
||||
with env.postgres.create_start("main", tenant_id=tenant_id) as pg:
|
||||
with pg.cursor() as cur:
|
||||
assert query_scalar(cur, "SELECT count(*) FROM t") == 100000
|
||||
|
||||
# Check that we had to retry the downloads
|
||||
assert env.pageserver.log_contains(".*list prefixes.*failed, will retry.*")
|
||||
assert env.pageserver.log_contains(".*download.*failed, will retry.*")
|
||||
|
||||
|
||||
num_connections = 10
|
||||
@@ -237,7 +257,7 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
pageserver_http = env.pageserver.http_client()
|
||||
|
||||
env.pageserver.allowed_errors.append(".*NotFound\\(Tenant .* not found")
|
||||
env.pageserver.allowed_errors.append(".*NotFound: Tenant .* not found")
|
||||
|
||||
# first check for non existing tenant
|
||||
tenant_id = TenantId.generate()
|
||||
@@ -272,8 +292,10 @@ def test_tenant_detach_smoke(neon_env_builder: NeonEnvBuilder):
|
||||
bogus_timeline_id = TimelineId.generate()
|
||||
pageserver_http.timeline_gc(tenant_id, bogus_timeline_id, 0)
|
||||
|
||||
# the error will be printed to the log too
|
||||
# the error will be printed to the log too
|
||||
env.pageserver.allowed_errors.append(".*gc target timeline does not exist.*")
|
||||
# Timelines get stopped during detach, ignore the gc calls that error, whitnessing that
|
||||
env.pageserver.allowed_errors.append(".*InternalServerError\\(timeline is Stopping.*")
|
||||
|
||||
# Detach while running manual GC.
|
||||
# It should wait for manual GC to finish because it runs in a task associated with the tenant.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import os
|
||||
import shutil
|
||||
import time
|
||||
from contextlib import closing
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
@@ -8,6 +9,7 @@ from typing import List
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.metrics import (
|
||||
PAGESERVER_GLOBAL_METRICS,
|
||||
PAGESERVER_PER_TENANT_METRICS,
|
||||
PAGESERVER_PER_TENANT_REMOTE_TIMELINE_CLIENT_METRICS,
|
||||
parse_metrics,
|
||||
@@ -160,6 +162,14 @@ def test_metrics_normal_work(neon_env_builder: NeonEnvBuilder):
|
||||
f"process_start_time_seconds (UTC): {datetime.fromtimestamp(metrics.query_one('process_start_time_seconds').value)}"
|
||||
)
|
||||
|
||||
# Test (a subset of) pageserver global metrics
|
||||
for metric in PAGESERVER_GLOBAL_METRICS:
|
||||
ps_samples = ps_metrics.query_all(metric, {})
|
||||
assert len(ps_samples) > 0
|
||||
for sample in ps_samples:
|
||||
labels = ",".join([f'{key}="{value}"' for key, value in sample.labels.items()])
|
||||
log.info(f"{sample.name}{{{labels}}} {sample.value}")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"remote_storage_kind",
|
||||
@@ -259,7 +269,7 @@ def test_pageserver_with_empty_tenants(
|
||||
files_in_timelines_dir == 0
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} should have an empty timelines/ directory"
|
||||
|
||||
# Trigger timeline reinitialization after pageserver restart
|
||||
# Trigger timeline re-initialization after pageserver restart
|
||||
env.postgres.stop_all()
|
||||
env.pageserver.stop()
|
||||
|
||||
@@ -278,7 +288,51 @@ def test_pageserver_with_empty_tenants(
|
||||
broken_tenant["state"] == "Broken"
|
||||
), f"Tenant {tenant_without_timelines_dir} without timelines dir should be broken"
|
||||
|
||||
broken_tenant_status = client.tenant_status(tenant_without_timelines_dir)
|
||||
assert (
|
||||
broken_tenant_status["state"] == "Broken"
|
||||
), f"Tenant {tenant_without_timelines_dir} without timelines dir should be broken"
|
||||
|
||||
assert env.pageserver.log_contains(".*Setting tenant as Broken state, reason:.*")
|
||||
|
||||
[loaded_tenant] = [t for t in tenants if t["id"] == str(tenant_with_empty_timelines_dir)]
|
||||
assert (
|
||||
loaded_tenant["state"] == "Active"
|
||||
), "Tenant {tenant_with_empty_timelines_dir} with empty timelines dir should be active and ready for timeline creation"
|
||||
|
||||
loaded_tenant_status = client.tenant_status(tenant_with_empty_timelines_dir)
|
||||
assert (
|
||||
loaded_tenant_status["state"] == "Active"
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} without timelines dir should be active"
|
||||
|
||||
time.sleep(1) # to allow metrics propagation
|
||||
|
||||
ps_metrics = parse_metrics(client.get_metrics(), "pageserver")
|
||||
broken_tenants_metric_filter = {
|
||||
"tenant_id": str(tenant_without_timelines_dir),
|
||||
"state": "broken",
|
||||
}
|
||||
active_tenants_metric_filter = {
|
||||
"tenant_id": str(tenant_with_empty_timelines_dir),
|
||||
"state": "active",
|
||||
}
|
||||
|
||||
tenant_active_count = int(
|
||||
ps_metrics.query_one(
|
||||
"pageserver_tenant_states_count", filter=active_tenants_metric_filter
|
||||
).value
|
||||
)
|
||||
|
||||
assert (
|
||||
tenant_active_count == 1
|
||||
), f"Tenant {tenant_with_empty_timelines_dir} should have metric as active"
|
||||
|
||||
tenant_broken_count = int(
|
||||
ps_metrics.query_one(
|
||||
"pageserver_tenant_states_count", filter=broken_tenants_metric_filter
|
||||
).value
|
||||
)
|
||||
|
||||
assert (
|
||||
tenant_broken_count == 1
|
||||
), f"Tenant {tenant_without_timelines_dir} should have metric as broken"
|
||||
|
||||
Reference in New Issue
Block a user