mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-11 07:22:55 +00:00
feat(pageserver): allow read path debug in getpagelsn API (#10748)
## Problem The usual workflow for me to debug read path errors in staging is: download the tenant to my laptop, import, and then run some read tests. With this patch, we can do this directly over staging pageservers. ## Summary of changes * Add a new `touchpagelsn` API that does a page read but does not return page info back. * Allow read from latest record LSN from get/touchpagelsn * Add read_debug config in the context. * The read path will read the context config to decide whether to enable read path tracing or not. Signed-off-by: Alex Chi Z <chi@neon.tech>
This commit is contained in:
@@ -98,6 +98,7 @@ pub struct RequestContext {
|
||||
download_behavior: DownloadBehavior,
|
||||
access_stats_behavior: AccessStatsBehavior,
|
||||
page_content_kind: PageContentKind,
|
||||
read_path_debug: bool,
|
||||
}
|
||||
|
||||
/// The kind of access to the page cache.
|
||||
@@ -155,6 +156,7 @@ impl RequestContextBuilder {
|
||||
download_behavior: DownloadBehavior::Download,
|
||||
access_stats_behavior: AccessStatsBehavior::Update,
|
||||
page_content_kind: PageContentKind::Unknown,
|
||||
read_path_debug: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -168,6 +170,7 @@ impl RequestContextBuilder {
|
||||
download_behavior: original.download_behavior,
|
||||
access_stats_behavior: original.access_stats_behavior,
|
||||
page_content_kind: original.page_content_kind,
|
||||
read_path_debug: original.read_path_debug,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -191,6 +194,11 @@ impl RequestContextBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn read_path_debug(mut self, b: bool) -> Self {
|
||||
self.inner.read_path_debug = b;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(self) -> RequestContext {
|
||||
self.inner
|
||||
}
|
||||
@@ -291,4 +299,8 @@ impl RequestContext {
|
||||
pub(crate) fn page_content_kind(&self) -> PageContentKind {
|
||||
self.page_content_kind
|
||||
}
|
||||
|
||||
pub(crate) fn read_path_debug(&self) -> bool {
|
||||
self.read_path_debug
|
||||
}
|
||||
}
|
||||
|
||||
@@ -68,6 +68,7 @@ use tokio_util::sync::CancellationToken;
|
||||
use tracing::*;
|
||||
|
||||
use crate::config::PageServerConf;
|
||||
use crate::context::RequestContextBuilder;
|
||||
use crate::context::{DownloadBehavior, RequestContext};
|
||||
use crate::deletion_queue::DeletionQueueClient;
|
||||
use crate::pgdatadir_mapping::LsnForTimestamp;
|
||||
@@ -2571,14 +2572,30 @@ async fn deletion_queue_flush(
|
||||
}
|
||||
}
|
||||
|
||||
/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
|
||||
async fn getpage_at_lsn_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
getpage_at_lsn_handler_inner(false, request, cancel).await
|
||||
}
|
||||
|
||||
async fn touchpage_at_lsn_handler(
|
||||
request: Request<Body>,
|
||||
cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
getpage_at_lsn_handler_inner(true, request, cancel).await
|
||||
}
|
||||
|
||||
/// Try if `GetPage@Lsn` is successful, useful for manual debugging.
|
||||
async fn getpage_at_lsn_handler_inner(
|
||||
touch: bool,
|
||||
request: Request<Body>,
|
||||
_cancel: CancellationToken,
|
||||
) -> Result<Response<Body>, ApiError> {
|
||||
let tenant_shard_id: TenantShardId = parse_request_param(&request, "tenant_shard_id")?;
|
||||
let timeline_id: TimelineId = parse_request_param(&request, "timeline_id")?;
|
||||
check_permission(&request, Some(tenant_shard_id.tenant_id))?;
|
||||
// Require pageserver admin permission for this API instead of only tenant-level token.
|
||||
check_permission(&request, None)?;
|
||||
let state = get_state(&request);
|
||||
|
||||
struct Key(pageserver_api::key::Key);
|
||||
@@ -2593,22 +2610,29 @@ async fn getpage_at_lsn_handler(
|
||||
|
||||
let key: Key = parse_query_param(&request, "key")?
|
||||
.ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'key' query parameter")))?;
|
||||
let lsn: Lsn = parse_query_param(&request, "lsn")?
|
||||
.ok_or_else(|| ApiError::BadRequest(anyhow!("missing 'lsn' query parameter")))?;
|
||||
let lsn: Option<Lsn> = parse_query_param(&request, "lsn")?;
|
||||
|
||||
async {
|
||||
let ctx = RequestContext::new(TaskKind::MgmtRequest, DownloadBehavior::Download);
|
||||
// Enable read path debugging
|
||||
let ctx = RequestContextBuilder::extend(&ctx).read_path_debug(true).build();
|
||||
let timeline = active_timeline_of_active_tenant(&state.tenant_manager, tenant_shard_id, timeline_id).await?;
|
||||
|
||||
// Use last_record_lsn if no lsn is provided
|
||||
let lsn = lsn.unwrap_or_else(|| timeline.get_last_record_lsn());
|
||||
let page = timeline.get(key.0, lsn, &ctx).await?;
|
||||
|
||||
Result::<_, ApiError>::Ok(
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "application/octet-stream")
|
||||
.body(hyper::Body::from(page))
|
||||
.unwrap(),
|
||||
)
|
||||
if touch {
|
||||
json_response(StatusCode::OK, ())
|
||||
} else {
|
||||
Result::<_, ApiError>::Ok(
|
||||
Response::builder()
|
||||
.status(StatusCode::OK)
|
||||
.header(header::CONTENT_TYPE, "application/octet-stream")
|
||||
.body(hyper::Body::from(page))
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
.instrument(info_span!("timeline_get", tenant_id = %tenant_shard_id.tenant_id, shard_id = %tenant_shard_id.shard_slug(), %timeline_id))
|
||||
.await
|
||||
@@ -3743,6 +3767,10 @@ pub fn make_router(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/getpage",
|
||||
|r| testing_api_handler("getpage@lsn", r, getpage_at_lsn_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/touchpage",
|
||||
|r| api_handler(r, touchpage_at_lsn_handler),
|
||||
)
|
||||
.get(
|
||||
"/v1/tenant/:tenant_shard_id/timeline/:timeline_id/keyspace",
|
||||
|r| api_handler(r, timeline_collect_keyspace),
|
||||
|
||||
@@ -1298,7 +1298,7 @@ impl Timeline {
|
||||
reconstruct_state: &mut ValuesReconstructState,
|
||||
ctx: &RequestContext,
|
||||
) -> Result<BTreeMap<Key, Result<Bytes, PageReconstructError>>, GetVectoredError> {
|
||||
let read_path = if self.conf.enable_read_path_debugging {
|
||||
let read_path = if self.conf.enable_read_path_debugging || ctx.read_path_debug() {
|
||||
Some(ReadPath::new(keyspace.clone(), lsn))
|
||||
} else {
|
||||
None
|
||||
|
||||
Reference in New Issue
Block a user