mirror of
https://github.com/neondatabase/neon.git
synced 2026-06-01 12:30:38 +00:00
find_lsn_for_timestamp fixes (#5844)
Includes the changes of #3689 that address point 1 of #3689, plus some further improvements. In particular, this PR does: * set `min_lsn` to a safe value to create branches from (and verify it in tests) * return `min_lsn` instead of `max_lsn` for `NoData` and `Past` (verify it in test for `Past`, `NoData` is harder and not as important) * return `commit_lsn` instead of `max_lsn` for Future (and verify it in the tests) * add some comments Split out of #5686 to get something more minimal out to users.
This commit is contained in:
@@ -29,9 +29,33 @@ pub type BlockNumber = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum LsnForTimestamp {
|
||||
/// Found commits both before and after the given timestamp
|
||||
Present(Lsn),
|
||||
|
||||
/// Found no commits after the given timestamp, this means
|
||||
/// that the newest data in the branch is older than the given
|
||||
/// timestamp.
|
||||
///
|
||||
/// All commits <= LSN happened before the given timestamp
|
||||
Future(Lsn),
|
||||
|
||||
/// The queried timestamp is past our horizon we look back at (PITR)
|
||||
///
|
||||
/// All commits > LSN happened after the given timestamp,
|
||||
/// but any commits < LSN might have happened before or after
|
||||
/// the given timestamp. We don't know because no data before
|
||||
/// the given lsn is available.
|
||||
Past(Lsn),
|
||||
|
||||
/// We have found no commit with a timestamp,
|
||||
/// so we can't return anything meaningful.
|
||||
///
|
||||
/// The associated LSN is the lower bound value we can safely
|
||||
/// create branches on, but no statement is made if it is
|
||||
/// older or newer than the timestamp.
|
||||
///
|
||||
/// This variant can e.g. be returned right after a
|
||||
/// cluster import.
|
||||
NoData(Lsn),
|
||||
}
|
||||
|
||||
@@ -324,7 +348,11 @@ impl Timeline {
|
||||
ctx: &RequestContext,
|
||||
) -> Result<LsnForTimestamp, PageReconstructError> {
|
||||
let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn();
|
||||
let min_lsn = *gc_cutoff_lsn_guard;
|
||||
// We use this method to figure out the branching LSN for the new branch, but the
|
||||
// GC cutoff could be before the branching point and we cannot create a new branch
|
||||
// with LSN < `ancestor_lsn`. Thus, pick the maximum of these two to be
|
||||
// on the safe side.
|
||||
let min_lsn = std::cmp::max(*gc_cutoff_lsn_guard, self.get_ancestor_lsn());
|
||||
let max_lsn = self.get_last_record_lsn();
|
||||
|
||||
// LSNs are always 8-byte aligned. low/mid/high represent the
|
||||
@@ -354,30 +382,32 @@ impl Timeline {
|
||||
low = mid + 1;
|
||||
}
|
||||
}
|
||||
// If `found_smaller == true`, `low` is the LSN of the last commit record
|
||||
// before or at `search_timestamp`
|
||||
//
|
||||
// FIXME: it would be better to get the LSN of the previous commit.
|
||||
// Otherwise, if you restore to the returned LSN, the database will
|
||||
// include physical changes from later commits that will be marked
|
||||
// as aborted, and will need to be vacuumed away.
|
||||
let commit_lsn = Lsn((low - 1) * 8);
|
||||
match (found_smaller, found_larger) {
|
||||
(false, false) => {
|
||||
// This can happen if no commit records have been processed yet, e.g.
|
||||
// just after importing a cluster.
|
||||
Ok(LsnForTimestamp::NoData(max_lsn))
|
||||
}
|
||||
(true, false) => {
|
||||
// Didn't find any commit timestamps larger than the request
|
||||
Ok(LsnForTimestamp::Future(max_lsn))
|
||||
Ok(LsnForTimestamp::NoData(min_lsn))
|
||||
}
|
||||
(false, true) => {
|
||||
// Didn't find any commit timestamps smaller than the request
|
||||
Ok(LsnForTimestamp::Past(max_lsn))
|
||||
Ok(LsnForTimestamp::Past(min_lsn))
|
||||
}
|
||||
(true, true) => {
|
||||
// low is the LSN of the first commit record *after* the search_timestamp,
|
||||
// Back off by one to get to the point just before the commit.
|
||||
//
|
||||
// FIXME: it would be better to get the LSN of the previous commit.
|
||||
// Otherwise, if you restore to the returned LSN, the database will
|
||||
// include physical changes from later commits that will be marked
|
||||
// as aborted, and will need to be vacuumed away.
|
||||
Ok(LsnForTimestamp::Present(Lsn((low - 1) * 8)))
|
||||
(true, false) => {
|
||||
// Only found commits with timestamps smaller than the request.
|
||||
// It's still a valid case for branch creation, return it.
|
||||
// And `update_gc_info()` ignores LSN for a `LsnForTimestamp::Future`
|
||||
// case, anyway.
|
||||
Ok(LsnForTimestamp::Future(commit_lsn))
|
||||
}
|
||||
(true, true) => Ok(LsnForTimestamp::Present(commit_lsn)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -79,13 +79,32 @@ def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder):
|
||||
def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
env = neon_env_builder.init_start()
|
||||
|
||||
new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping")
|
||||
endpoint_main = env.endpoints.create_start("test_lsn_mapping")
|
||||
log.info("postgres is running on 'test_lsn_mapping' branch")
|
||||
tenant_id, _ = env.neon_cli.create_tenant(
|
||||
conf={
|
||||
# disable default GC and compaction
|
||||
"gc_period": "1000 m",
|
||||
"compaction_period": "0 s",
|
||||
"gc_horizon": f"{1024 ** 2}",
|
||||
"checkpoint_distance": f"{1024 ** 2}",
|
||||
"compaction_target_size": f"{1024 ** 2}",
|
||||
}
|
||||
)
|
||||
|
||||
timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id)
|
||||
endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id)
|
||||
timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0]
|
||||
log.info("postgres is running on 'main' branch")
|
||||
|
||||
cur = endpoint_main.connect().cursor()
|
||||
|
||||
# Obtain an lsn before all write operations on this branch
|
||||
start_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_lsn()"))
|
||||
|
||||
# Create table, and insert rows, each in a separate transaction
|
||||
# Disable synchronous_commit to make this initialization go faster.
|
||||
# Disable `synchronous_commit` to make this initialization go faster.
|
||||
# XXX: on my laptop this test takes 7s, and setting `synchronous_commit=off`
|
||||
# doesn't change anything.
|
||||
#
|
||||
# Each row contains current insert LSN and the current timestamp, when
|
||||
# the row was inserted.
|
||||
@@ -104,40 +123,63 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder):
|
||||
cur.execute("INSERT INTO foo VALUES (-1)")
|
||||
|
||||
# Wait until WAL is received by pageserver
|
||||
wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id)
|
||||
last_flush_lsn = wait_for_last_flush_lsn(env, endpoint_main, tenant_id, timeline_id)
|
||||
|
||||
with env.pageserver.http_client() as client:
|
||||
# Check edge cases: timestamp in the future
|
||||
# Check edge cases
|
||||
# Timestamp is in the future
|
||||
probe_timestamp = tbl[-1][1] + timedelta(hours=1)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
)
|
||||
assert result["kind"] == "future"
|
||||
# make sure that we return a well advanced lsn here
|
||||
assert Lsn(result["lsn"]) > start_lsn
|
||||
|
||||
# timestamp too the far history
|
||||
# Timestamp is in the unreachable past
|
||||
probe_timestamp = tbl[0][1] - timedelta(hours=10)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
)
|
||||
assert result["kind"] == "past"
|
||||
# make sure that we return the minimum lsn here at the start of the range
|
||||
assert Lsn(result["lsn"]) < start_lsn
|
||||
|
||||
# Probe a bunch of timestamps in the valid range
|
||||
for i in range(1, len(tbl), 100):
|
||||
probe_timestamp = tbl[i][1]
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2
|
||||
)
|
||||
assert result["kind"] not in ["past", "nodata"]
|
||||
lsn = result["lsn"]
|
||||
# Call get_lsn_by_timestamp to get the LSN
|
||||
# Launch a new read-only node at that LSN, and check that only the rows
|
||||
# that were supposed to be committed at that point in time are visible.
|
||||
endpoint_here = env.endpoints.create_start(
|
||||
branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn
|
||||
branch_name="test_lsn_mapping",
|
||||
endpoint_id="ep-lsn_mapping_read",
|
||||
lsn=lsn,
|
||||
tenant_id=tenant_id,
|
||||
)
|
||||
assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i
|
||||
|
||||
endpoint_here.stop_and_destroy()
|
||||
|
||||
# Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff
|
||||
timeline_id_child = env.neon_cli.create_branch(
|
||||
"test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping"
|
||||
)
|
||||
|
||||
# Timestamp is in the unreachable past
|
||||
probe_timestamp = tbl[0][1] - timedelta(hours=10)
|
||||
result = client.timeline_get_lsn_by_timestamp(
|
||||
tenant_id, timeline_id_child, f"{probe_timestamp.isoformat()}Z", 2
|
||||
)
|
||||
assert result["kind"] == "past"
|
||||
# make sure that we return the minimum lsn here at the start of the range
|
||||
assert Lsn(result["lsn"]) >= last_flush_lsn
|
||||
|
||||
|
||||
# Test pageserver get_timestamp_of_lsn API
|
||||
def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):
|
||||
|
||||
Reference in New Issue
Block a user