mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-22 15:41:15 +00:00
pageserver: use image consistent LSN for force image layer creation (#12547)
This is a no-op for the neon deployment * Introduce the concept image consistent lsn: of the largest LSN below which all pages have been redone successfully * Use the image consistent LSN for forced image layer creations * Optionally expose the image consistent LSN via the timeline describe HTTP endpoint * Add a sharded timeline describe endpoint to storcon --------- Co-authored-by: Chen Luo <chen.luo@databricks.com>
This commit is contained in:
@@ -2342,6 +2342,20 @@ class NeonStorageController(MetricsGetter, LogUtils):
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
# HADRON
|
||||
def tenant_timeline_describe(
|
||||
self,
|
||||
tenant_id: TenantId,
|
||||
timeline_id: TimelineId,
|
||||
):
|
||||
response = self.request(
|
||||
"GET",
|
||||
f"{self.api}/control/v1/tenant/{tenant_id}/timeline/{timeline_id}",
|
||||
headers=self.headers(TokenScope.ADMIN),
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
|
||||
def nodes(self):
|
||||
"""
|
||||
:return: list of {"id": ""}
|
||||
|
||||
@@ -960,9 +960,9 @@ def get_layer_map(env, tenant_shard_id, timeline_id, ps_id):
|
||||
return image_layer_count, delta_layer_count
|
||||
|
||||
|
||||
def test_image_creation_timeout(neon_env_builder: NeonEnvBuilder):
|
||||
def test_image_layer_force_creation_period(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Tests that page server can force creating new images if image creation timeout is enabled
|
||||
Tests that page server can force creating new images if image_layer_force_creation_period is enabled
|
||||
"""
|
||||
# use large knobs to disable L0 compaction/image creation except for the force image creation
|
||||
tenant_conf = {
|
||||
@@ -972,10 +972,10 @@ def test_image_creation_timeout(neon_env_builder: NeonEnvBuilder):
|
||||
"checkpoint_distance": 10 * 1024,
|
||||
"checkpoint_timeout": "1s",
|
||||
"image_layer_force_creation_period": "1s",
|
||||
# The lsn for forced image layer creations is calculated once every 10 minutes.
|
||||
# Hence, drive compaction manually such that the test doesn't compute it at the
|
||||
# wrong time.
|
||||
"compaction_period": "0s",
|
||||
"pitr_interval": "10s",
|
||||
"gc_period": "1s",
|
||||
"compaction_period": "1s",
|
||||
"lsn_lease_length": "1s",
|
||||
}
|
||||
|
||||
# consider every tenant large to run the image layer generation check more eagerly
|
||||
@@ -1018,4 +1018,69 @@ def test_image_creation_timeout(neon_env_builder: NeonEnvBuilder):
|
||||
)
|
||||
|
||||
|
||||
def test_image_consistent_lsn(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test the /v1/tenant/<tenant_id>/timeline/<timeline_id> endpoint and the computation of image_consistent_lsn
|
||||
"""
|
||||
# use large knobs to disable L0 compaction/image creation except for the force image creation
|
||||
tenant_conf = {
|
||||
"compaction_threshold": "100",
|
||||
"image_creation_threshold": "100",
|
||||
"image_layer_creation_check_threshold": "1",
|
||||
"checkpoint_distance": 10 * 1024,
|
||||
"checkpoint_timeout": "1s",
|
||||
"image_layer_force_creation_period": "1s",
|
||||
"pitr_interval": "10s",
|
||||
"gc_period": "1s",
|
||||
"compaction_period": "1s",
|
||||
"lsn_lease_length": "1s",
|
||||
}
|
||||
|
||||
neon_env_builder.num_pageservers = 2
|
||||
neon_env_builder.num_safekeepers = 1
|
||||
env = neon_env_builder.init_start(
|
||||
initial_tenant_conf=tenant_conf,
|
||||
initial_tenant_shard_count=4,
|
||||
initial_tenant_shard_stripe_size=1,
|
||||
)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
endpoint = env.endpoints.create_start("main")
|
||||
endpoint.safe_psql("CREATE TABLE foo (id INTEGER, val text)")
|
||||
for v in range(10):
|
||||
endpoint.safe_psql(
|
||||
f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))", log_query=False
|
||||
)
|
||||
|
||||
response = env.storage_controller.tenant_timeline_describe(tenant_id, timeline_id)
|
||||
shards = response["shards"]
|
||||
for shard in shards:
|
||||
assert shard["image_consistent_lsn"] is not None
|
||||
image_consistent_lsn = response["image_consistent_lsn"]
|
||||
assert image_consistent_lsn is not None
|
||||
|
||||
# do more writes and wait for image_consistent_lsn to advance
|
||||
for v in range(100):
|
||||
endpoint.safe_psql(
|
||||
f"INSERT INTO foo (id, val) VALUES ({v}, repeat('abcde{v:0>3}', 500))", log_query=False
|
||||
)
|
||||
|
||||
def check_image_consistent_lsn_advanced():
|
||||
response = env.storage_controller.tenant_timeline_describe(tenant_id, timeline_id)
|
||||
new_image_consistent_lsn = response["image_consistent_lsn"]
|
||||
shards = response["shards"]
|
||||
for shard in shards:
|
||||
print(f"shard {shard['tenant_id']} image_consistent_lsn{shard['image_consistent_lsn']}")
|
||||
assert new_image_consistent_lsn != image_consistent_lsn
|
||||
|
||||
wait_until(check_image_consistent_lsn_advanced)
|
||||
|
||||
endpoint.stop_and_destroy()
|
||||
|
||||
for ps in env.pageservers:
|
||||
ps.allowed_errors.append(".*created delta file of size.*larger than double of target.*")
|
||||
|
||||
|
||||
# END_HADRON
|
||||
|
||||
Reference in New Issue
Block a user