Optionally return json from get_lsn_by_timestamp (#5608)

This does two things: first a minor refactor to not use HTTP/1.x style header names and also to not panic if some certain requests had no "Accept" header. As a second thing, it addresses the third bullet point from #3689: > Change `get_lsn_by_timestamp` API method to return LSN even if we only found commit before the specified timestamp. This is done by adding a version parameter to the `get_lsn_by_timestamp` API call and making its behaviour depend on the version number. Part of #3414 (but doesn't address it in its entirety). --------- Co-authored-by: Joonas Koivunen <joonas@neon.tech>
2026-01-05 20:42:54 +00:00 · 2023-10-25 18:46:34 +02:00
parent c155cc0c3f
commit a673e4e7a9
4 changed files with 132 additions and 21 deletions
--- a/pageserver/src/http/openapi_spec.yml
+++ b/pageserver/src/http/openapi_spec.yml
@@ -392,13 +392,19 @@ paths:
            type: string
            format: date-time
          description: A timestamp to get the LSN
+        - name: version
+          in: query
+          required: false
+          schema:
+            type: integer
+          description: The version of the endpoint to use
      responses:
        "200":
          description: OK
          content:
            application/json:
              schema:
-                type: string
+                $ref: "#/components/schemas/LsnByTimestampResponse"
        "400":
          description: Error when no tenant id found in path, no timeline id or invalid timestamp
          content:
@@ -1384,6 +1390,19 @@ components:
          type: string
          format: hex

+    LsnByTimestampResponse:
+      type: object
+      required:
+        - lsn
+        - kind
+      properties:
+        lsn:
+          type: string
+          format: hex
+        kind:
+          type: string
+          enum: [past, present, future, nodata]
+
    Error:
      type: object
      required:
--- a/pageserver/src/http/routes.rs
+++ b/pageserver/src/http/routes.rs
@@ -8,7 +8,7 @@ use std::sync::Arc;
 use anyhow::{anyhow, Context, Result};
 use futures::TryFutureExt;
 use humantime::format_rfc3339;
-use hyper::header::CONTENT_TYPE;
+use hyper::header;
 use hyper::StatusCode;
 use hyper::{Body, Request, Response, Uri};
 use metrics::launch_timestamp::LaunchTimestamp;
@@ -17,6 +17,7 @@ use pageserver_api::models::{
    TenantLoadRequest, TenantLocationConfigRequest,
 };
 use remote_storage::GenericRemoteStorage;
+use serde_with::{serde_as, DisplayFromStr};
 use tenant_size_model::{SizeResult, StorageModel};
 use tokio_util::sync::CancellationToken;
 use tracing::*;
@@ -484,6 +485,8 @@ async fn get_lsn_by_timestamp_handler(
    let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?;
    check_permission(&request, Some(tenant_id))?;

+    let version: Option<u8> = parse_query_param(&request, "version")?;
+
    let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
    let timestamp_raw = must_get_query_param(&request, "timestamp")?;
    let timestamp = humantime::parse_rfc3339(&timestamp_raw)
@@ -495,13 +498,32 @@ async fn get_lsn_by_timestamp_handler(
    let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?;
    let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?;

-    let result = match result {
-        LsnForTimestamp::Present(lsn) => format!("{lsn}"),
-        LsnForTimestamp::Future(_lsn) => "future".into(),
-        LsnForTimestamp::Past(_lsn) => "past".into(),
-        LsnForTimestamp::NoData(_lsn) => "nodata".into(),
-    };
-    json_response(StatusCode::OK, result)
+    if version.unwrap_or(0) > 1 {
+        #[serde_as]
+        #[derive(serde::Serialize)]
+        struct Result {
+            #[serde_as(as = "DisplayFromStr")]
+            lsn: Lsn,
+            kind: &'static str,
+        }
+        let (lsn, kind) = match result {
+            LsnForTimestamp::Present(lsn) => (lsn, "present"),
+            LsnForTimestamp::Future(lsn) => (lsn, "future"),
+            LsnForTimestamp::Past(lsn) => (lsn, "past"),
+            LsnForTimestamp::NoData(lsn) => (lsn, "nodata"),
+        };
+        json_response(StatusCode::OK, Result { lsn, kind })
+    } else {
+        // FIXME: this is a temporary crutch not to break backwards compatibility
+        // See https://github.com/neondatabase/neon/pull/5608
+        let result = match result {
+            LsnForTimestamp::Present(lsn) => format!("{lsn}"),
+            LsnForTimestamp::Future(_lsn) => "future".into(),
+            LsnForTimestamp::Past(_lsn) => "past".into(),
+            LsnForTimestamp::NoData(_lsn) => "nodata".into(),
+        };
+        json_response(StatusCode::OK, result)
+    }
 }

 async fn get_timestamp_of_lsn_handler(
@@ -767,6 +789,10 @@ async fn tenant_size_handler(
        .map_err(ApiError::InternalServerError)?;

    let mut sizes = None;
+    let accepts_html = headers
+        .get(header::ACCEPT)
+        .map(|v| v == "text/html")
+        .unwrap_or_default();
    if !inputs_only.unwrap_or(false) {
        let storage_model = inputs
            .calculate_model()
@@ -774,11 +800,11 @@ async fn tenant_size_handler(
        let size = storage_model.calculate();

        // If request header expects html, return html
-        if headers["Accept"] == "text/html" {
+        if accepts_html {
            return synthetic_size_html_response(inputs, storage_model, size);
        }
        sizes = Some(size);
-    } else if headers["Accept"] == "text/html" {
+    } else if accepts_html {
        return Err(ApiError::BadRequest(anyhow!(
            "inputs_only parameter is incompatible with html output request"
        )));
@@ -929,7 +955,7 @@ fn synthetic_size_html_response(
 pub fn html_response(status: StatusCode, data: String) -> Result<Response<Body>, ApiError> {
    let response = Response::builder()
        .status(status)
-        .header(hyper::header::CONTENT_TYPE, "text/html")
+        .header(header::CONTENT_TYPE, "text/html")
        .body(Body::from(data.as_bytes().to_vec()))
        .map_err(|e| ApiError::InternalServerError(e.into()))?;
    Ok(response)
@@ -1310,7 +1336,7 @@ async fn getpage_at_lsn_handler(
        Result::<_, ApiError>::Ok(
            Response::builder()
                .status(StatusCode::OK)
-                .header(CONTENT_TYPE, "application/octet-stream")
+                .header(header::CONTENT_TYPE, "application/octet-stream")
                .body(hyper::Body::from(page))
                .unwrap(),
        )
--- a/test_runner/fixtures/pageserver/http.py
+++ b/test_runner/fixtures/pageserver/http.py
@@ -441,13 +441,13 @@ class PageserverHttpClient(requests.Session):
        assert res_json is None

    def timeline_get_lsn_by_timestamp(
-        self, tenant_id: TenantId, timeline_id: TimelineId, timestamp
+        self, tenant_id: TenantId, timeline_id: TimelineId, timestamp, version: int
    ):
        log.info(
            f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}"
        )
        res = self.get(
-            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}",
+            f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}&version={version}",
        )
        self.verbose_error(res)
        res_json = res.json()
--- a/test_runner/regress/test_lsn_mapping.py
+++ b/test_runner/regress/test_lsn_mapping.py
@@ -8,6 +8,71 @@ from fixtures.types import Lsn
 from fixtures.utils import query_scalar


+#
+# Test pageserver get_lsn_by_timestamp API
+#
+def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder):
+    env = neon_env_builder.init_start()
+
+    new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
+    endpoint_main = env.endpoints.create_start("test_lsn_mapping")
+    log.info("postgres is running on 'test_lsn_mapping' branch")
+
+    cur = endpoint_main.connect().cursor()
+    # Create table, and insert rows, each in a separate transaction
+    # Disable synchronous_commit to make this initialization go faster.
+    #
+    # Each row contains current insert LSN and the current timestamp, when
+    # the row was inserted.
+    cur.execute("SET synchronous_commit=off")
+    cur.execute("CREATE TABLE foo (x integer)")
+    tbl = []
+    for i in range(1000):
+        cur.execute("INSERT INTO foo VALUES(%s)", (i,))
+        # Get the timestamp at UTC
+        after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None)
+        tbl.append([i, after_timestamp])
+
+    # Execute one more transaction with synchronous_commit enabled, to flush
+    # all the previous transactions
+    cur.execute("SET synchronous_commit=on")
+    cur.execute("INSERT INTO foo VALUES (-1)")
+
+    # Wait until WAL is received by pageserver
+    wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
+
+    with env.pageserver.http_client() as client:
+        # Check edge cases: timestamp in the future
+        probe_timestamp = tbl[-1][1] + timedelta(hours=1)
+        result = client.timeline_get_lsn_by_timestamp(
+            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
+        )
+        assert result == "future"
+
+        # timestamp too the far history
+        probe_timestamp = tbl[0][1] - timedelta(hours=10)
+        result = client.timeline_get_lsn_by_timestamp(
+            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
+        )
+        assert result == "past"
+
+        # Probe a bunch of timestamps in the valid range
+        for i in range(1, len(tbl), 100):
+            probe_timestamp = tbl[i][1]
+            lsn = client.timeline_get_lsn_by_timestamp(
+                env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1
+            )
+            # Call get_lsn_by_timestamp to get the LSN
+            # Launch a new read-only node at that LSN, and check that only the rows
+            # that were supposed to be committed at that point in time are visible.
+            endpoint_here = env.endpoints.create_start(
+                branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
+            )
+            assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
+
+            endpoint_here.stop_and_destroy()
+
+
 #
 # Test pageserver get_lsn_by_timestamp API
 #
@@ -45,23 +110,24 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
        # Check edge cases: timestamp in the future
        probe_timestamp = tbl[-1][1] + timedelta(hours=1)
        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
+            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
        )
-        assert result == "future"
+        assert result["kind"] == "future"

        # timestamp too the far history
        probe_timestamp = tbl[0][1] - timedelta(hours=10)
        result = client.timeline_get_lsn_by_timestamp(
-            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
+            env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
        )
-        assert result == "past"
+        assert result["kind"] == "past"

        # Probe a bunch of timestamps in the valid range
        for i in range(1, len(tbl), 100):
            probe_timestamp = tbl[i][1]
-            lsn = client.timeline_get_lsn_by_timestamp(
-                env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z"
+            result = client.timeline_get_lsn_by_timestamp(
+                env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
            )
+            lsn = result["lsn"]
            # Call get_lsn_by_timestamp to get the LSN
            # Launch a new read-only node at that LSN, and check that only the rows
            # that were supposed to be committed at that point in time are visible.