diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 9e8a6b02cc..09feba9b68 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -29,9 +29,33 @@ pub type BlockNumber = u32; #[derive(Debug)] pub enum LsnForTimestamp { + /// Found commits both before and after the given timestamp Present(Lsn), + + /// Found no commits after the given timestamp, this means + /// that the newest data in the branch is older than the given + /// timestamp. + /// + /// All commits <= LSN happened before the given timestamp Future(Lsn), + + /// The queried timestamp is past our horizon we look back at (PITR) + /// + /// All commits > LSN happened after the given timestamp, + /// but any commits < LSN might have happened before or after + /// the given timestamp. We don't know because no data before + /// the given lsn is available. Past(Lsn), + + /// We have found no commit with a timestamp, + /// so we can't return anything meaningful. + /// + /// The associated LSN is the lower bound value we can safely + /// create branches on, but no statement is made if it is + /// older or newer than the timestamp. + /// + /// This variant can e.g. be returned right after a + /// cluster import. NoData(Lsn), } @@ -324,7 +348,11 @@ impl Timeline { ctx: &RequestContext, ) -> Result { let gc_cutoff_lsn_guard = self.get_latest_gc_cutoff_lsn(); - let min_lsn = *gc_cutoff_lsn_guard; + // We use this method to figure out the branching LSN for the new branch, but the + // GC cutoff could be before the branching point and we cannot create a new branch + // with LSN < `ancestor_lsn`. Thus, pick the maximum of these two to be + // on the safe side. + let min_lsn = std::cmp::max(*gc_cutoff_lsn_guard, self.get_ancestor_lsn()); let max_lsn = self.get_last_record_lsn(); // LSNs are always 8-byte aligned. low/mid/high represent the @@ -354,30 +382,32 @@ impl Timeline { low = mid + 1; } } + // If `found_smaller == true`, `low` is the LSN of the last commit record + // before or at `search_timestamp` + // + // FIXME: it would be better to get the LSN of the previous commit. + // Otherwise, if you restore to the returned LSN, the database will + // include physical changes from later commits that will be marked + // as aborted, and will need to be vacuumed away. + let commit_lsn = Lsn((low - 1) * 8); match (found_smaller, found_larger) { (false, false) => { // This can happen if no commit records have been processed yet, e.g. // just after importing a cluster. - Ok(LsnForTimestamp::NoData(max_lsn)) - } - (true, false) => { - // Didn't find any commit timestamps larger than the request - Ok(LsnForTimestamp::Future(max_lsn)) + Ok(LsnForTimestamp::NoData(min_lsn)) } (false, true) => { // Didn't find any commit timestamps smaller than the request - Ok(LsnForTimestamp::Past(max_lsn)) + Ok(LsnForTimestamp::Past(min_lsn)) } - (true, true) => { - // low is the LSN of the first commit record *after* the search_timestamp, - // Back off by one to get to the point just before the commit. - // - // FIXME: it would be better to get the LSN of the previous commit. - // Otherwise, if you restore to the returned LSN, the database will - // include physical changes from later commits that will be marked - // as aborted, and will need to be vacuumed away. - Ok(LsnForTimestamp::Present(Lsn((low - 1) * 8))) + (true, false) => { + // Only found commits with timestamps smaller than the request. + // It's still a valid case for branch creation, return it. + // And `update_gc_info()` ignores LSN for a `LsnForTimestamp::Future` + // case, anyway. + Ok(LsnForTimestamp::Future(commit_lsn)) } + (true, true) => Ok(LsnForTimestamp::Present(commit_lsn)), } } diff --git a/test_runner/regress/test_lsn_mapping.py b/test_runner/regress/test_lsn_mapping.py index 726bfa5f29..f79c1c347c 100644 --- a/test_runner/regress/test_lsn_mapping.py +++ b/test_runner/regress/test_lsn_mapping.py @@ -79,13 +79,32 @@ def test_lsn_mapping_old(neon_env_builder: NeonEnvBuilder): def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): env = neon_env_builder.init_start() - new_timeline_id = env.neon_cli.create_branch("test_lsn_mapping") - endpoint_main = env.endpoints.create_start("test_lsn_mapping") - log.info("postgres is running on 'test_lsn_mapping' branch") + tenant_id, _ = env.neon_cli.create_tenant( + conf={ + # disable default GC and compaction + "gc_period": "1000 m", + "compaction_period": "0 s", + "gc_horizon": f"{1024 ** 2}", + "checkpoint_distance": f"{1024 ** 2}", + "compaction_target_size": f"{1024 ** 2}", + } + ) + + timeline_id = env.neon_cli.create_branch("test_lsn_mapping", tenant_id=tenant_id) + endpoint_main = env.endpoints.create_start("test_lsn_mapping", tenant_id=tenant_id) + timeline_id = endpoint_main.safe_psql("show neon.timeline_id")[0][0] + log.info("postgres is running on 'main' branch") cur = endpoint_main.connect().cursor() + + # Obtain an lsn before all write operations on this branch + start_lsn = Lsn(query_scalar(cur, "SELECT pg_current_wal_lsn()")) + # Create table, and insert rows, each in a separate transaction # Disable synchronous_commit to make this initialization go faster. + # Disable `synchronous_commit` to make this initialization go faster. + # XXX: on my laptop this test takes 7s, and setting `synchronous_commit=off` + # doesn't change anything. # # Each row contains current insert LSN and the current timestamp, when # the row was inserted. @@ -104,40 +123,63 @@ def test_lsn_mapping(neon_env_builder: NeonEnvBuilder): cur.execute("INSERT INTO foo VALUES (-1)") # Wait until WAL is received by pageserver - wait_for_last_flush_lsn(env, endpoint_main, env.initial_tenant, new_timeline_id) + last_flush_lsn = wait_for_last_flush_lsn(env, endpoint_main, tenant_id, timeline_id) with env.pageserver.http_client() as client: - # Check edge cases: timestamp in the future + # Check edge cases + # Timestamp is in the future probe_timestamp = tbl[-1][1] + timedelta(hours=1) result = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 + tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) assert result["kind"] == "future" + # make sure that we return a well advanced lsn here + assert Lsn(result["lsn"]) > start_lsn - # timestamp too the far history + # Timestamp is in the unreachable past probe_timestamp = tbl[0][1] - timedelta(hours=10) result = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 + tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) assert result["kind"] == "past" + # make sure that we return the minimum lsn here at the start of the range + assert Lsn(result["lsn"]) < start_lsn # Probe a bunch of timestamps in the valid range for i in range(1, len(tbl), 100): probe_timestamp = tbl[i][1] result = client.timeline_get_lsn_by_timestamp( - env.initial_tenant, new_timeline_id, f"{probe_timestamp.isoformat()}Z", 2 + tenant_id, timeline_id, f"{probe_timestamp.isoformat()}Z", 2 ) + assert result["kind"] not in ["past", "nodata"] lsn = result["lsn"] # Call get_lsn_by_timestamp to get the LSN # Launch a new read-only node at that LSN, and check that only the rows # that were supposed to be committed at that point in time are visible. endpoint_here = env.endpoints.create_start( - branch_name="test_lsn_mapping", endpoint_id="ep-lsn_mapping_read", lsn=lsn + branch_name="test_lsn_mapping", + endpoint_id="ep-lsn_mapping_read", + lsn=lsn, + tenant_id=tenant_id, ) assert endpoint_here.safe_psql("SELECT max(x) FROM foo")[0][0] == i endpoint_here.stop_and_destroy() + # Do the "past" check again at a new branch to ensure that we don't return something before the branch cutoff + timeline_id_child = env.neon_cli.create_branch( + "test_lsn_mapping_child", tenant_id=tenant_id, ancestor_branch_name="test_lsn_mapping" + ) + + # Timestamp is in the unreachable past + probe_timestamp = tbl[0][1] - timedelta(hours=10) + result = client.timeline_get_lsn_by_timestamp( + tenant_id, timeline_id_child, f"{probe_timestamp.isoformat()}Z", 2 + ) + assert result["kind"] == "past" + # make sure that we return the minimum lsn here at the start of the range + assert Lsn(result["lsn"]) >= last_flush_lsn + # Test pageserver get_timestamp_of_lsn API def test_ts_of_lsn_api(neon_env_builder: NeonEnvBuilder):