Leverage the existing mechanism to retry 404 errors instead of implementing new code. (#12567)

## Problem
In https://github.com/neondatabase/neon/pull/12513, the new code was
implemented to retry 404 errors caused by the replication lag. However,
this implemented the new logic, making the script more complicated,
while we have an existing one in `neon_api.py`.
## Summary of changes
The existing mechanism is used to retry 404 errors.

---------

Co-authored-by: Alexey Masterov <alexey.masterov@databricks.com>
This commit is contained in:
a-masterov
2025-07-14 15:25:25 +02:00
committed by GitHub
parent eb830fa547
commit 4fedcbc0ac
2 changed files with 16 additions and 25 deletions

View File

@@ -34,7 +34,9 @@ class NeonAPI:
self.retries524 = 0
self.retries4xx = 0
def __request(self, method: str | bytes, endpoint: str, **kwargs: Any) -> requests.Response:
def __request(
self, method: str | bytes, endpoint: str, retry404: bool = False, **kwargs: Any
) -> requests.Response:
kwargs["headers"] = kwargs.get("headers", {})
kwargs["headers"]["Authorization"] = f"Bearer {self.__neon_api_key}"
@@ -55,10 +57,12 @@ class NeonAPI:
resp.raise_for_status()
break
elif resp.status_code >= 400:
if resp.status_code == 422:
if resp.json()["message"] == "branch not ready yet":
retry = True
self.retries4xx += 1
if resp.status_code == 404 and retry404:
retry = True
self.retries4xx += 1
elif resp.status_code == 422 and resp.json()["message"] == "branch not ready yet":
retry = True
self.retries4xx += 1
elif resp.status_code == 423 and resp.json()["message"] in {
"endpoint is in some transitive state, could not suspend",
"project already has running conflicting operations, scheduling of new ones is prohibited",
@@ -66,7 +70,7 @@ class NeonAPI:
retry = True
self.retries4xx += 1
elif resp.status_code == 524:
log.info("The request was timed out, trying to get operations")
log.info("The request was timed out")
retry = True
self.retries524 += 1
if retry:
@@ -203,6 +207,9 @@ class NeonAPI:
resp = self.__request(
"GET",
f"/projects/{project_id}/branches/{branch_id}",
# XXX Retry get parent details to work around the issue
# https://databricks.atlassian.net/browse/LKB-279
retry404=True,
headers={
"Accept": "application/json",
},

View File

@@ -13,7 +13,6 @@ from typing import TYPE_CHECKING, Any
import pytest
from fixtures.log_helper import log
from requests import HTTPError
if TYPE_CHECKING:
from pathlib import Path
@@ -153,26 +152,11 @@ class NeonBranch:
return
self.updated_at = datetime.fromisoformat(res["branch"]["updated_at"])
parent_id: str = res["branch"]["parent_id"]
# XXX Retry get parent details to work around the issue
# https://databricks.atlassian.net/browse/LKB-279
target_time = datetime.now() + timedelta(seconds=30)
while datetime.now() < target_time:
try:
parent_def = self.neon_api.get_branch_details(self.project_id, parent_id)
except HTTPError as he:
if he.response.status_code == 404:
log.info("Branch not found, waiting...")
time.sleep(1)
else:
raise HTTPError(he) from he
else:
break
else:
raise RuntimeError(f"Branch {parent_id} not found")
# Creates an object for the parent branch
# After the reset operation a new parent branch is created
parent = NeonBranch(self.project, parent_def, True)
parent = NeonBranch(
self.project, self.neon_api.get_branch_details(self.project_id, parent_id), True
)
self.project.branches[parent_id] = parent
self.parent = parent
parent.children[self.id] = self