passthrough wait_for_upload, better upload scheduling

Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
Alex Chi Z
2024-11-21 14:11:46 -05:00
parent 95474cfbe0
commit 499105da6d
7 changed files with 85 additions and 27 deletions

View File

@@ -4947,6 +4947,7 @@ def last_flush_lsn_upload(
timeline_id: TimelineId,
pageserver_id: int | None = None,
auth_token: str | None = None,
wait_until_uploaded: bool = True,
) -> Lsn:
"""
Wait for pageserver to catch to the latest flush LSN of given endpoint,
@@ -4960,7 +4961,9 @@ def last_flush_lsn_upload(
for tenant_shard_id, pageserver in shards:
ps_http = pageserver.http_client(auth_token=auth_token)
wait_for_last_record_lsn(ps_http, tenant_shard_id, timeline_id, last_flush_lsn)
ps_http.timeline_checkpoint(tenant_shard_id, timeline_id, wait_until_uploaded=True)
ps_http.timeline_checkpoint(
tenant_shard_id, timeline_id, wait_until_uploaded=wait_until_uploaded
)
return last_flush_lsn
@@ -4985,7 +4988,7 @@ def generate_uploads_and_deletions(
timeline_id: TimelineId | None = None,
data: str | None = None,
pageserver: NeonPageserver,
wait_for_upload: bool = True,
wait_until_uploaded: bool = True,
):
"""
Using the environment's default tenant + timeline, generate a load pattern
@@ -5008,7 +5011,12 @@ def generate_uploads_and_deletions(
if init:
endpoint.safe_psql("CREATE TABLE foo (id INTEGER PRIMARY KEY, val text)")
last_flush_lsn_upload(
env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver.id
env,
endpoint,
tenant_id,
timeline_id,
pageserver_id=pageserver.id,
wait_until_uploaded=wait_until_uploaded,
)
def churn(data):
@@ -5031,7 +5039,12 @@ def generate_uploads_and_deletions(
# in a state where there are "future layers" in remote storage that will generate deletions
# after a restart.
last_flush_lsn_upload(
env, endpoint, tenant_id, timeline_id, pageserver_id=pageserver.id
env,
endpoint,
tenant_id,
timeline_id,
pageserver_id=pageserver.id,
wait_until_uploaded=wait_until_uploaded,
)
# Compaction should generate some GC-elegible layers
@@ -5047,4 +5060,4 @@ def generate_uploads_and_deletions(
# background ingest, no more uploads pending, and therefore no non-determinism
# in subsequent actions like pageserver restarts.
flush_ep_to_pageserver(env, endpoint, tenant_id, timeline_id, pageserver.id)
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=wait_for_upload)
ps_http.timeline_checkpoint(tenant_id, timeline_id, wait_until_uploaded=wait_until_uploaded)

View File

@@ -794,7 +794,7 @@ class PageserverHttpClient(requests.Session, MetricsGetter):
if compact is not None:
query["compact"] = "true" if compact else "false"
log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}")
log.info(f"Requesting checkpoint: tenant {tenant_id}, timeline {timeline_id}, wait_until_uploaded={wait_until_uploaded}")
res = self.put(
f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/checkpoint",
params=query,

View File

@@ -462,7 +462,7 @@ def test_emergency_mode(neon_env_builder: NeonEnvBuilder, pg_bin: PgBin):
# Because it is in emergency mode, it will not attempt to validate deletions required by the initial barrier, and therefore
# other files cannot be uploaded b/c it's waiting for the initial barrier to be validated.
generate_uploads_and_deletions(
env, init=False, pageserver=env.pageserver, wait_for_upload=False
env, init=False, pageserver=env.pageserver, wait_until_uploaded=False
)
# The pageserver should neither validate nor execute any deletions, it should have