From 4fedcbc0ac94d399808384911b92f8417b74c286 Mon Sep 17 00:00:00 2001 From: a-masterov <72613290+a-masterov@users.noreply.github.com> Date: Mon, 14 Jul 2025 15:25:25 +0200 Subject: [PATCH] Leverage the existing mechanism to retry 404 errors instead of implementing new code. (#12567) ## Problem In https://github.com/neondatabase/neon/pull/12513, the new code was implemented to retry 404 errors caused by the replication lag. However, this implemented the new logic, making the script more complicated, while we have an existing one in `neon_api.py`. ## Summary of changes The existing mechanism is used to retry 404 errors. --------- Co-authored-by: Alexey Masterov --- test_runner/fixtures/neon_api.py | 19 +++++++++++++------ test_runner/random_ops/test_random_ops.py | 22 +++------------------- 2 files changed, 16 insertions(+), 25 deletions(-) diff --git a/test_runner/fixtures/neon_api.py b/test_runner/fixtures/neon_api.py index 9d85b9a332..e0f16abe77 100644 --- a/test_runner/fixtures/neon_api.py +++ b/test_runner/fixtures/neon_api.py @@ -34,7 +34,9 @@ class NeonAPI: self.retries524 = 0 self.retries4xx = 0 - def __request(self, method: str | bytes, endpoint: str, **kwargs: Any) -> requests.Response: + def __request( + self, method: str | bytes, endpoint: str, retry404: bool = False, **kwargs: Any + ) -> requests.Response: kwargs["headers"] = kwargs.get("headers", {}) kwargs["headers"]["Authorization"] = f"Bearer {self.__neon_api_key}" @@ -55,10 +57,12 @@ class NeonAPI: resp.raise_for_status() break elif resp.status_code >= 400: - if resp.status_code == 422: - if resp.json()["message"] == "branch not ready yet": - retry = True - self.retries4xx += 1 + if resp.status_code == 404 and retry404: + retry = True + self.retries4xx += 1 + elif resp.status_code == 422 and resp.json()["message"] == "branch not ready yet": + retry = True + self.retries4xx += 1 elif resp.status_code == 423 and resp.json()["message"] in { "endpoint is in some transitive state, could not suspend", "project already has running conflicting operations, scheduling of new ones is prohibited", @@ -66,7 +70,7 @@ class NeonAPI: retry = True self.retries4xx += 1 elif resp.status_code == 524: - log.info("The request was timed out, trying to get operations") + log.info("The request was timed out") retry = True self.retries524 += 1 if retry: @@ -203,6 +207,9 @@ class NeonAPI: resp = self.__request( "GET", f"/projects/{project_id}/branches/{branch_id}", + # XXX Retry get parent details to work around the issue + # https://databricks.atlassian.net/browse/LKB-279 + retry404=True, headers={ "Accept": "application/json", }, diff --git a/test_runner/random_ops/test_random_ops.py b/test_runner/random_ops/test_random_ops.py index 5c43b06bc5..b106e9b729 100644 --- a/test_runner/random_ops/test_random_ops.py +++ b/test_runner/random_ops/test_random_ops.py @@ -13,7 +13,6 @@ from typing import TYPE_CHECKING, Any import pytest from fixtures.log_helper import log -from requests import HTTPError if TYPE_CHECKING: from pathlib import Path @@ -153,26 +152,11 @@ class NeonBranch: return self.updated_at = datetime.fromisoformat(res["branch"]["updated_at"]) parent_id: str = res["branch"]["parent_id"] - # XXX Retry get parent details to work around the issue - # https://databricks.atlassian.net/browse/LKB-279 - target_time = datetime.now() + timedelta(seconds=30) - while datetime.now() < target_time: - try: - parent_def = self.neon_api.get_branch_details(self.project_id, parent_id) - except HTTPError as he: - if he.response.status_code == 404: - log.info("Branch not found, waiting...") - time.sleep(1) - else: - raise HTTPError(he) from he - else: - break - else: - raise RuntimeError(f"Branch {parent_id} not found") - # Creates an object for the parent branch # After the reset operation a new parent branch is created - parent = NeonBranch(self.project, parent_def, True) + parent = NeonBranch( + self.project, self.neon_api.get_branch_details(self.project_id, parent_id), True + ) self.project.branches[parent_id] = parent self.parent = parent parent.children[self.id] = self