Choose the max(branching_lsn, after_pg_upgrade_lsn) to import new timeline data.

This fixed xlog flush request %X/%X is not satisfied error.
See comments in the branch_timeline_impl().
This commit is contained in:
Anastasia Lubennikova
2024-09-13 04:24:46 +01:00
parent 168a6a87d7
commit 33ad0dc177
2 changed files with 74 additions and 31 deletions

View File

@@ -55,6 +55,7 @@ pub async fn import_timeline_from_postgres_datadir(
pgdata_path: &Utf8Path,
pgdata_lsn: Lsn,
change_control_file_lsn: bool,
src_timeline: Option<&Timeline>,
ctx: &RequestContext,
) -> Result<()> {
let mut pg_control: Option<ControlFileData> = None;
@@ -101,6 +102,37 @@ pub async fn import_timeline_from_postgres_datadir(
}
}
// // if we're importing after pg_upgrade
// // also copy metadata for all relations that were not copied
// // from the parent timeline
// if let Some(src_timeline) = src_timeline {
// for ((spcnode, dbnode), _) in src_timeline
// .list_dbdirs(pgdata_lsn, ctx)
// .await
// .with_context(|| format!("Failed to list_dbdirs for src_timeline"))?
// {
// let rels = src_timeline
// .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx)
// .await
// .with_context(|| format!("Failed to list_rels for src_timeline"))?;
// let new_rels = tline
// .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx)
// .await
// .with_context(|| format!("Failed to list_rels for new_timeline"))?;
// for rel in rels {
// if !new_rels.contains(&rel) {
// let nblocks = src_timeline
// .get_rel_size(rel, Version::Lsn(pgdata_lsn), ctx)
// .await
// .with_context(|| format!("Failed to get_rel_size for src_timeline"))?;
// // TODO insert relation size into the new timeline's cache
// }
// }
// }
// }
// We're done importing all the data files.
modification.commit(ctx).await?;
@@ -136,25 +168,27 @@ pub async fn import_timeline_from_postgres_datadir(
Ok(())
}
fn is_user_relfile(path: &Path) -> bool {
let filename = &path
.file_name()
.expect("missing rel filename")
.to_string_lossy();
let (relnode, _, _) = parse_relfilename(filename).map_err(|e| {
warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
e
}).unwrap();
let (relnode, _, _) = parse_relfilename(filename)
.map_err(|e| {
warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
e
})
.unwrap();
// if this is import after pg_upgrade, skip all user data files
// relfilenode > FirstNormalObjectId of the new cluster
// THIS IS BAD
// THIS IS WRONG
// if catalog relation was vacuumed with vacuum full, it will have a new relfilenode
// which will be greater than FirstNormalObjectId
// Use pg_relfilemap decide if the relation is a catalog relation
if relnode > pg_constants::FIRST_NORMAL_OBJECT_ID {
//
//
return true;
}
@@ -182,7 +216,6 @@ async fn import_rel(
e
})?;
let mut buf: [u8; 8192] = [0u8; 8192];
ensure!(len % BLCKSZ as usize == 0);
@@ -606,14 +639,14 @@ async fn import_file(
_ => {
// if this is import after pg_upgrade, skip all user data files
// relfilenode > FirstNormalObjectId of the new cluster
if is_user_relfile(file_path) && new_checkpoint_lsn.is_some()
{
// TODO Implement import_rel_from_old_version that will copy
// relation metadata and cached size from the parent timeline
if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() {
info!("after pg_restore skipping {:?}", file_path);
} else {
import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;
debug!("imported rel creation");
}
}
}
} else if file_path.starts_with("base") {
@@ -637,10 +670,9 @@ async fn import_file(
debug!("ignored PG_VERSION file");
}
_ => {
// if this is import after pg_upgrade, skip all user data files
// if this is import after pg_upgrade, skip all user data files
// relfilenode > FirstNormalObjectId of the new cluster
if is_user_relfile(file_path) && new_checkpoint_lsn.is_some()
{
if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() {
info!("after pg_restore skipping {:?}", file_path);
} else {
import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;

View File

@@ -3393,6 +3393,14 @@ impl Tenant {
)
})?;
// TODO
// do pg_upgrade bits here
// Rust is not the most convenient for writing this,
// So just call the pg_upgrade in the subprocess.
// In the future we can turn it into API call to some service that will do the work
//
// 1. start postgres on a parent timeline at the start_lsn, using neon_local (now this is hardcoded)
// 2. run pg_upgrade using neon_local for old version and freshly created pgdata for new version
run_pg_upgrade(
self.conf,
&pgdata_path,
@@ -3407,25 +3415,26 @@ impl Tenant {
"Failed to pg_upgrade {timeline_id} with pg_version {pg_version} at {pgdata_path}"
)
})?;
// TODO
// do pg_upgrade bits here
// Rust is not the most convenient for writing this,
// So just call the bash script that relies on the pg_upgrade and neon_local to do all the work.
// In the future we can turn it into API call to some service that will do the work
// 1. start postgres on a parent timeline at the start_lsn, using neon_local (somehow)
// 2. run pg_upgrade using old neon_local and new neon_local (?) + new freshly created pgdata
// Or maybe use ad-hoc thing?
// We need old cluster read-only
// And new cluster read-write, but we will export it fully, so we can do whatever we want during upgrade
// TODO Do we need to adjust something else?
// Or should it be just start_lsn as it is?
let pgdata_lsn = (start_lsn + 1).align();
let contolfile_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
let start_lsn = start_lsn.align();
// choose the max of controlfile_lsn and start_lsn
//
// It is possible that the controlfile_lsn is ahead of the start_lsn,
// especially for small databases
// In that case, we need to start from the controlfile_lsn.
// Otherwise we will have LSN on the pages larger that the lsn of the branch.
// And this will lead to the error, when compute will try to flush the page
// with the lsn larger than the branch lsn.
//
// ERROR : xlog flush request %X/%X is not satisfied --- flushed only to %X/%X
//
// We got another problem here - a gap between the
// branching_lsn (where we diverged with the parent) and pgdata_lsn (import lsn of the new timeline)
// We should teach the wal-redo to skip all the records between these two points.
// Otherwise we will see some updates from the parent timeline in the new timeline
let pgdata_lsn = std::cmp::max(contolfile_lsn, start_lsn);
assert!(pgdata_lsn.is_aligned());
// TODO
// We must have start_lsn+1 == pgdata_lsn
// Set it somehow
// TODO why do we need these lines?
let tenant_shard_id = uninitialized_timeline.owning_tenant.tenant_shard_id;
@@ -3442,6 +3451,7 @@ impl Tenant {
&pgdata_path,
pgdata_lsn,
true,
Some(src_timeline),
ctx,
)
.await
@@ -3667,6 +3677,7 @@ impl Tenant {
&pgdata_path,
pgdata_lsn,
false,
None,
ctx,
)
.await