mirror of
https://github.com/neondatabase/neon.git
synced 2026-05-31 12:00:42 +00:00
pageserver: heatmap uploads (#6050)
Dependency (commits inline): https://github.com/neondatabase/neon/pull/5842 ## Problem Secondary mode tenants need a manifest of what to download. Ultimately this will be some kind of heat-scored set of layers, but as a robust first step we will simply use the set of resident layers: secondary tenant locations will aim to match the on-disk content of the attached location. ## Summary of changes - Add heatmap types representing the remote structure - Add hooks to Tenant/Timeline for generating these heatmaps - Create a new `HeatmapUploader` type that is external to `Tenant`, and responsible for walking the list of attached tenants and scheduling heatmap uploads. Notes to reviewers: - Putting the logic for uploads (and later, secondary mode downloads) outside of `Tenant` is an opinionated choice, motivated by: - Enable future smarter scheduling of operations, e.g. uploading the stalest tenant first, rather than having all tenants compete for a fair semaphore on a first-come-first-served basis. Similarly for downloads, we may wish to schedule the tenants with the hottest un-downloaded layers first. - Enable accessing upload-related state without synchronization (it belongs to HeatmapUploader, rather than being some Mutex<>'d part of Tenant) - Avoid further expanding the scope of Tenant/Timeline types, which are already among the largest in the codebase - You might reasonably wonder how much of the uploader code could be a generic job manager thing. Probably some of it: but let's defer pulling that out until we have at least two users (perhaps secondary downloads will be the second one) to highlight which bits are really generic. Compromises: - Later, instead of using digests of heatmaps to decide whether anything changed, I would prefer to avoid walking the layers in tenants that don't have changes: tracking that will be a bit invasive, as it needs input from both remote_timeline_client and Layer.
This commit is contained in:
@@ -322,6 +322,10 @@ class PageserverHttpClient(requests.Session):
|
||||
self.verbose_error(res)
|
||||
return TenantConfig.from_json(res.json())
|
||||
|
||||
def tenant_heatmap_upload(self, tenant_id: TenantId):
|
||||
res = self.post(f"http://localhost:{self.port}/v1/tenant/{tenant_id}/heatmap_upload")
|
||||
self.verbose_error(res)
|
||||
|
||||
def set_tenant_config(self, tenant_id: TenantId, config: dict[str, Any]):
|
||||
assert "tenant_id" not in config.keys()
|
||||
res = self.put(
|
||||
|
||||
@@ -16,6 +16,7 @@ from fixtures.log_helper import log
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
|
||||
TIMELINE_INDEX_PART_FILE_NAME = "index_part.json"
|
||||
TENANT_HEATMAP_FILE_NAME = "heatmap-v1.json"
|
||||
|
||||
|
||||
@enum.unique
|
||||
@@ -133,6 +134,13 @@ class LocalFsStorage:
|
||||
with self.index_path(tenant_id, timeline_id).open("r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def heatmap_path(self, tenant_id: TenantId) -> Path:
|
||||
return self.tenant_path(tenant_id) / TENANT_HEATMAP_FILE_NAME
|
||||
|
||||
def heatmap_content(self, tenant_id):
|
||||
with self.heatmap_path(tenant_id).open("r") as f:
|
||||
return json.load(f)
|
||||
|
||||
def to_toml_inline_table(self) -> str:
|
||||
rv = {
|
||||
"local_path": str(self.root),
|
||||
|
||||
@@ -163,6 +163,7 @@ def test_fully_custom_config(positive_env: NeonEnv):
|
||||
"gc_feedback": True,
|
||||
"gc_horizon": 23 * (1024 * 1024),
|
||||
"gc_period": "2h 13m",
|
||||
"heatmap_period": "10m",
|
||||
"image_creation_threshold": 7,
|
||||
"pitr_interval": "1m",
|
||||
"lagging_wal_timeout": "23m",
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict, Optional
|
||||
import pytest
|
||||
from fixtures.log_helper import log
|
||||
from fixtures.neon_fixtures import NeonEnvBuilder, NeonPageserver
|
||||
from fixtures.remote_storage import RemoteStorageKind
|
||||
from fixtures.remote_storage import LocalFsStorage, RemoteStorageKind
|
||||
from fixtures.types import TenantId, TimelineId
|
||||
from fixtures.utils import wait_until
|
||||
from fixtures.workload import Workload
|
||||
@@ -330,3 +330,46 @@ def test_live_migration(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
workload.churn_rows(64, pageserver_b.id)
|
||||
workload.validate(pageserver_b.id)
|
||||
|
||||
|
||||
def test_heatmap_uploads(neon_env_builder: NeonEnvBuilder):
|
||||
"""
|
||||
Test the sequence of location states that are used in a live migration.
|
||||
"""
|
||||
env = neon_env_builder.init_start() # initial_tenant_conf=TENANT_CONF)
|
||||
assert isinstance(env.pageserver_remote_storage, LocalFsStorage)
|
||||
|
||||
tenant_id = env.initial_tenant
|
||||
timeline_id = env.initial_timeline
|
||||
|
||||
# Write some data so that we have some layers
|
||||
workload = Workload(env, tenant_id, timeline_id)
|
||||
workload.init(env.pageservers[0].id)
|
||||
|
||||
# Write some layers and upload a heatmap
|
||||
workload.write_rows(256, env.pageservers[0].id)
|
||||
env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
|
||||
|
||||
def validate_heatmap(heatmap):
|
||||
assert len(heatmap["timelines"]) == 1
|
||||
assert heatmap["timelines"][0]["timeline_id"] == str(timeline_id)
|
||||
assert len(heatmap["timelines"][0]["layers"]) > 0
|
||||
layers = heatmap["timelines"][0]["layers"]
|
||||
|
||||
# Each layer appears at most once
|
||||
assert len(set(layer["name"] for layer in layers)) == len(layers)
|
||||
|
||||
# Download and inspect the heatmap that the pageserver uploaded
|
||||
heatmap_first = env.pageserver_remote_storage.heatmap_content(tenant_id)
|
||||
log.info(f"Read back heatmap: {heatmap_first}")
|
||||
validate_heatmap(heatmap_first)
|
||||
|
||||
# Do some more I/O to generate more layers
|
||||
workload.churn_rows(64, env.pageservers[0].id)
|
||||
env.pageserver.http_client().tenant_heatmap_upload(tenant_id)
|
||||
|
||||
# Ensure that another heatmap upload includes the new layers
|
||||
heatmap_second = env.pageserver_remote_storage.heatmap_content(tenant_id)
|
||||
log.info(f"Read back heatmap: {heatmap_second}")
|
||||
assert heatmap_second != heatmap_first
|
||||
validate_heatmap(heatmap_second)
|
||||
|
||||
Reference in New Issue
Block a user