diff --git a/pageserver/src/http/openapi_spec.yml b/pageserver/src/http/openapi_spec.yml index f0bf2666a7..e8421d619d 100644 --- a/pageserver/src/http/openapi_spec.yml +++ b/pageserver/src/http/openapi_spec.yml @@ -392,13 +392,19 @@ paths: type: string format: date-time description: A timestamp to get the LSN + - name: version + in: query + required: false + schema: + type: integer + description: The version of the endpoint to use responses: "200": description: OK content: application/json: schema: - type: string + $ref: "#/components/schemas/LsnByTimestampResponse" "400": description: Error when no tenant id found in path, no timeline id or invalid timestamp content: @@ -1384,6 +1390,19 @@ components: type: string format: hex + LsnByTimestampResponse: + type: object + required: + - lsn + - kind + properties: + lsn: + type: string + format: hex + kind: + type: string + enum: [past, present, future, nodata] + Error: type: object required: diff --git a/pageserver/src/http/routes.rs b/pageserver/src/http/routes.rs index 142e85d203..54c5cdf133 100644 --- a/pageserver/src/http/routes.rs +++ b/pageserver/src/http/routes.rs @@ -8,7 +8,7 @@ use std::sync::Arc; use anyhow::{anyhow, Context, Result}; use futures::TryFutureExt; use humantime::format_rfc3339; -use hyper::header::CONTENT_TYPE; +use hyper::header; use hyper::StatusCode; use hyper::{Body, Request, Response, Uri}; use metrics::launch_timestamp::LaunchTimestamp; @@ -17,6 +17,7 @@ use pageserver_api::models::{ TenantLoadRequest, TenantLocationConfigRequest, }; use remote_storage::GenericRemoteStorage; +use serde_with::{serde_as, DisplayFromStr}; use tenant_size_model::{SizeResult, StorageModel}; use tokio_util::sync::CancellationToken; use tracing::*; @@ -484,6 +485,8 @@ async fn get_lsn_by_timestamp_handler( let tenant_id: TenantId = parse_request_param(&request, "tenant_id")?; check_permission(&request, Some(tenant_id))?; + let version: Option = parse_query_param(&request, "version")?; + let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?; let timestamp_raw = must_get_query_param(&request, "timestamp")?; let timestamp = humantime::parse_rfc3339(×tamp_raw) @@ -495,13 +498,32 @@ async fn get_lsn_by_timestamp_handler( let timeline = active_timeline_of_active_tenant(tenant_id, timeline_id).await?; let result = timeline.find_lsn_for_timestamp(timestamp_pg, &ctx).await?; - let result = match result { - LsnForTimestamp::Present(lsn) => format!("{lsn}"), - LsnForTimestamp::Future(_lsn) => "future".into(), - LsnForTimestamp::Past(_lsn) => "past".into(), - LsnForTimestamp::NoData(_lsn) => "nodata".into(), - }; - json_response(StatusCode::OK, result) + if version.unwrap_or(0) > 1 { + #[serde_as] + #[derive(serde::Serialize)] + struct Result { + #[serde_as(as = "DisplayFromStr")] + lsn: Lsn, + kind: &'static str, + } + let (lsn, kind) = match result { + LsnForTimestamp::Present(lsn) => (lsn, "present"), + LsnForTimestamp::Future(lsn) => (lsn, "future"), + LsnForTimestamp::Past(lsn) => (lsn, "past"), + LsnForTimestamp::NoData(lsn) => (lsn, "nodata"), + }; + json_response(StatusCode::OK, Result { lsn, kind }) + } else { + // FIXME: this is a temporary crutch not to break backwards compatibility + // See https://github.com/neondatabase/neon/pull/5608 + let result = match result { + LsnForTimestamp::Present(lsn) => format!("{lsn}"), + LsnForTimestamp::Future(_lsn) => "future".into(), + LsnForTimestamp::Past(_lsn) => "past".into(), + LsnForTimestamp::NoData(_lsn) => "nodata".into(), + }; + json_response(StatusCode::OK, result) + } } async fn get_timestamp_of_lsn_handler( @@ -767,6 +789,10 @@ async fn tenant_size_handler( .map_err(ApiError::InternalServerError)?; let mut sizes = None; + let accepts_html = headers + .get(header::ACCEPT) + .map(|v| v == "text/html") + .unwrap_or_default(); if !inputs_only.unwrap_or(false) { let storage_model = inputs .calculate_model() @@ -774,11 +800,11 @@ async fn tenant_size_handler( let size = storage_model.calculate(); // If request header expects html, return html - if headers["Accept"] == "text/html" { + if accepts_html { return synthetic_size_html_response(inputs, storage_model, size); } sizes = Some(size); - } else if headers["Accept"] == "text/html" { + } else if accepts_html { return Err(ApiError::BadRequest(anyhow!( "inputs_only parameter is incompatible with html output request" ))); @@ -929,7 +955,7 @@ fn synthetic_size_html_response( pub fn html_response(status: StatusCode, data: String) -> Result, ApiError> { let response = Response::builder() .status(status) - .header(hyper::header::CONTENT_TYPE, "text/html") + .header(header::CONTENT_TYPE, "text/html") .body(Body::from(data.as_bytes().to_vec())) .map_err(|e| ApiError::InternalServerError(e.into()))?; Ok(response) @@ -1310,7 +1336,7 @@ async fn getpage_at_lsn_handler( Result::<_, ApiError>::Ok( Response::builder() .status(StatusCode::OK) - .header(CONTENT_TYPE, "application/octet-stream") + .header(header::CONTENT_TYPE, "application/octet-stream") .body(hyper::Body::from(page)) .unwrap(), ) diff --git a/test_runner/fixtures/pageserver/http.py b/test_runner/fixtures/pageserver/http.py index 598b48b56f..d2c3715c52 100644 --- a/test_runner/fixtures/pageserver/http.py +++ b/test_runner/fixtures/pageserver/http.py @@ -441,13 +441,13 @@ class PageserverHttpClient(requests.Session): assert res_json is None def timeline_get_lsn_by_timestamp( - self, tenant_id: TenantId, timeline_id: TimelineId, timestamp + self, tenant_id: TenantId, timeline_id: TimelineId, timestamp, version: int ): log.info( f"Requesting lsn by timestamp {timestamp}, tenant {tenant_id}, timeline {timeline_id}" ) res = self.get( - f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}", + f"http://localhost:{self.port}/v1/tenant/{tenant_id}/timeline/{timeline_id}/get_lsn_by_timestamp?timestamp={timestamp}&version={version}", ) self.verbose_error(res) res_json = res.json() diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py index 03606e3c1c..726bfa5f29 100644 --- a/test_runner/regress/test_lsn_mapping.py +++ b/test_runner/regress/test_lsn_mapping.py @@ -8,6 +8,71 @@ from fixtures.types import Lsn from fixtures.utils import query_scalar +# +# Test pageserver get_lsn_by_timestamp API +# +def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder): + env = neon_env_builder.init_start() + + new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") + endpoint_main = env.endpoints.create_start("test_lsn_mapping") + log.info("postgres is running on 'test_lsn_mapping' branch") + + cur = endpoint_main.connect().cursor() + # Create table, and insert rows, each in a separate transaction + # Disable synchronous_commit to make this initialization go faster. + # + # Each row contains current insert LSN and the current timestamp, when + # the row was inserted. + cur.execute("SET synchronous_commit=off") + cur.execute("CREATE TABLE foo (x integer)") + tbl = [] + for i in range(1000): + cur.execute("INSERT INTO foo VALUES(%s)", (i,)) + # Get the timestamp at UTC + after_timestamp = query_scalar(cur, "SELECT clock_timestamp()").replace(tzinfo=None) + tbl.append([i, after_timestamp]) + + # Execute one more transaction with synchronous_commit enabled, to flush + # all the previous transactions + cur.execute("SET synchronous_commit=on") + cur.execute("INSERT INTO foo VALUES (-1)") + + # Wait until WAL is received by pageserver + wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id) + + with env.pageserver.http_client() as client: + # Check edge cases: timestamp in the future + probe_timestamp = tbl[-1][1] + timedelta(hours=1) + result = client.timeline_get_lsn_by_timestamp( + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1 + ) + assert result == "future" + + # timestamp too the far history + probe_timestamp = tbl[0][1] - timedelta(hours=10) + result = client.timeline_get_lsn_by_timestamp( + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1 + ) + assert result == "past" + + # Probe a bunch of timestamps in the valid range + for i in range(1, len(tbl), 100): + probe_timestamp = tbl[i][1] + lsn = client.timeline_get_lsn_by_timestamp( + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 1 + ) + # Call get_lsn_by_timestamp to get the LSN + # Launch a new read-only node at that LSN, and check that only the rows + # that were supposed to be committed at that point in time are visible. + endpoint_here = env.endpoints.create_start( + branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn + ) + assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i + + endpoint_here.stop_and_destroy() + + # # Test pageserver get_lsn_by_timestamp API # @@ -45,23 +110,24 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): # Check edge cases: timestamp in the future probe_timestamp = tbl[-1][1] + timedelta(hours=1) result = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z" + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) - assert result == "future" + assert result["kind"] == "future" # timestamp too the far history probe_timestamp = tbl[0][1] - timedelta(hours=10) result = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z" + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) - assert result == "past" + assert result["kind"] == "past" # Probe a bunch of timestamps in the valid range for i in range(1, len(tbl), 100): probe_timestamp = tbl[i][1] - lsn = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z" + result = client.timeline_get_lsn_by_timestamp( + env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) + lsn = result["lsn"] # Call get_lsn_by_timestamp to get the LSN # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible.