offload_lfc_interval_seconds in ComputeSpec (#12447)

- Add ComputeSpec flag `offload_lfc_interval_seconds` controlling
  whether LFC should be offloaded to endpoint storage. Default value
  (None) means "don't offload".
- Add glue code around it for `neon_local` and integration tests.
- Add `autoprewarm` mode for `test_lfc_prewarm` testing
  `offload_lfc_interval_seconds` and `autoprewarm` flags in conjunction.
- Rename `compute_ctl_lfc_prewarm_requests_total` and
`compute_ctl_lfc_offload_requests_total` to
`compute_ctl_lfc_prewarms_total`
  and `compute_ctl_lfc_offloads_total` to reflect we count prewarms and
  offloads, not `compute_ctl` requests of those.
  Don't count request in metrics if there is a prewarm/offload already
  ongoing.

https://github.com/neondatabase/cloud/issues/19011
Resolves: https://github.com/neondatabase/cloud/issues/30770
This commit is contained in:
Mikhail
2025-07-04 19:49:57 +01:00
committed by GitHub
parent 3a44774227
commit 7ed4530618
13 changed files with 296 additions and 178 deletions

View File

@@ -57,6 +57,8 @@ class EndpointHttpClient(requests.Session):
self.auth = BearerAuth(jwt)
self.mount("http://", HTTPAdapter())
self.prewarm_url = f"http://localhost:{external_port}/lfc/prewarm"
self.offload_url = f"http://localhost:{external_port}/lfc/offload"
def dbs_and_roles(self):
res = self.get(f"http://localhost:{self.external_port}/dbs_and_roles", auth=self.auth)
@@ -64,33 +66,39 @@ class EndpointHttpClient(requests.Session):
return res.json()
def prewarm_lfc_status(self) -> dict[str, str]:
res = self.get(f"http://localhost:{self.external_port}/lfc/prewarm")
res = self.get(self.prewarm_url)
res.raise_for_status()
json: dict[str, str] = res.json()
return json
def prewarm_lfc(self, from_endpoint_id: str | None = None):
url: str = f"http://localhost:{self.external_port}/lfc/prewarm"
params = {"from_endpoint": from_endpoint_id} if from_endpoint_id else dict()
self.post(url, params=params).raise_for_status()
self.post(self.prewarm_url, params=params).raise_for_status()
self.prewarm_lfc_wait()
def prewarm_lfc_wait(self):
def prewarmed():
json = self.prewarm_lfc_status()
status, err = json["status"], json.get("error")
assert status == "completed", f"{status}, error {err}"
assert status == "completed", f"{status}, {err=}"
wait_until(prewarmed, timeout=60)
def offload_lfc(self):
url = f"http://localhost:{self.external_port}/lfc/offload"
self.post(url).raise_for_status()
def offload_lfc_status(self) -> dict[str, str]:
res = self.get(self.offload_url)
res.raise_for_status()
json: dict[str, str] = res.json()
return json
def offload_lfc(self):
self.post(self.offload_url).raise_for_status()
self.offload_lfc_wait()
def offload_lfc_wait(self):
def offloaded():
res = self.get(url)
res.raise_for_status()
json = res.json()
json = self.offload_lfc_status()
status, err = json["status"], json.get("error")
assert status == "completed", f"{status}, error {err}"
assert status == "completed", f"{status}, {err=}"
wait_until(offloaded)