diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index 534f68f5ef..6229593464 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -55,6 +55,7 @@ pub async fn import_timeline_from_postgres_datadir( pgdata_path: &Utf8Path, pgdata_lsn: Lsn, change_control_file_lsn: bool, + src_timeline: Option<&Timeline>, ctx: &RequestContext, ) -> Result<()> { let mut pg_control: Option = None; @@ -101,6 +102,37 @@ pub async fn import_timeline_from_postgres_datadir( } } + // // if we're importing after pg_upgrade + // // also copy metadata for all relations that were not copied + // // from the parent timeline + // if let Some(src_timeline) = src_timeline { + // for ((spcnode, dbnode), _) in src_timeline + // .list_dbdirs(pgdata_lsn, ctx) + // .await + // .with_context(|| format!("Failed to list_dbdirs for src_timeline"))? + // { + // let rels = src_timeline + // .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx) + // .await + // .with_context(|| format!("Failed to list_rels for src_timeline"))?; + + // let new_rels = tline + // .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx) + // .await + // .with_context(|| format!("Failed to list_rels for new_timeline"))?; + + // for rel in rels { + // if !new_rels.contains(&rel) { + // let nblocks = src_timeline + // .get_rel_size(rel, Version::Lsn(pgdata_lsn), ctx) + // .await + // .with_context(|| format!("Failed to get_rel_size for src_timeline"))?; + // // TODO insert relation size into the new timeline's cache + // } + // } + // } + // } + // We're done importing all the data files. modification.commit(ctx).await?; @@ -136,25 +168,27 @@ pub async fn import_timeline_from_postgres_datadir( Ok(()) } - fn is_user_relfile(path: &Path) -> bool { let filename = &path .file_name() .expect("missing rel filename") .to_string_lossy(); - let (relnode, _, _) = parse_relfilename(filename).map_err(|e| { - warn!("unrecognized file in postgres datadir: {:?} ({})", path, e); - e - }).unwrap(); + let (relnode, _, _) = parse_relfilename(filename) + .map_err(|e| { + warn!("unrecognized file in postgres datadir: {:?} ({})", path, e); + e + }) + .unwrap(); // if this is import after pg_upgrade, skip all user data files // relfilenode > FirstNormalObjectId of the new cluster - // THIS IS BAD + // THIS IS WRONG // if catalog relation was vacuumed with vacuum full, it will have a new relfilenode // which will be greater than FirstNormalObjectId + // Use pg_relfilemap decide if the relation is a catalog relation if relnode > pg_constants::FIRST_NORMAL_OBJECT_ID { - // + // return true; } @@ -182,7 +216,6 @@ async fn import_rel( e })?; - let mut buf: [u8; 8192] = [0u8; 8192]; ensure!(len % BLCKSZ as usize == 0); @@ -606,14 +639,14 @@ async fn import_file( _ => { // if this is import after pg_upgrade, skip all user data files // relfilenode > FirstNormalObjectId of the new cluster - if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() - { + // TODO Implement import_rel_from_old_version that will copy + // relation metadata and cached size from the parent timeline + if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() { info!("after pg_restore skipping {:?}", file_path); } else { import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?; debug!("imported rel creation"); } - } } } else if file_path.starts_with("base") { @@ -637,10 +670,9 @@ async fn import_file( debug!("ignored PG_VERSION file"); } _ => { - // if this is import after pg_upgrade, skip all user data files + // if this is import after pg_upgrade, skip all user data files // relfilenode > FirstNormalObjectId of the new cluster - if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() - { + if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() { info!("after pg_restore skipping {:?}", file_path); } else { import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?; diff --git a/pageserver/src/tenant.rs b/pageserver/src/tenant.rs index 1cf953068e..27c00a01ae 100644 --- a/pageserver/src/tenant.rs +++ b/pageserver/src/tenant.rs @@ -3393,6 +3393,14 @@ impl Tenant { ) })?; + // TODO + // do pg_upgrade bits here + // Rust is not the most convenient for writing this, + // So just call the pg_upgrade in the subprocess. + // In the future we can turn it into API call to some service that will do the work + // + // 1. start postgres on a parent timeline at the start_lsn, using neon_local (now this is hardcoded) + // 2. run pg_upgrade using neon_local for old version and freshly created pgdata for new version run_pg_upgrade( self.conf, &pgdata_path, @@ -3407,25 +3415,26 @@ impl Tenant { "Failed to pg_upgrade {timeline_id} with pg_version {pg_version} at {pgdata_path}" ) })?; - // TODO - // do pg_upgrade bits here - // Rust is not the most convenient for writing this, - // So just call the bash script that relies on the pg_upgrade and neon_local to do all the work. - // In the future we can turn it into API call to some service that will do the work - // 1. start postgres on a parent timeline at the start_lsn, using neon_local (somehow) - // 2. run pg_upgrade using old neon_local and new neon_local (?) + new freshly created pgdata - // Or maybe use ad-hoc thing? - // We need old cluster read-only - // And new cluster read-write, but we will export it fully, so we can do whatever we want during upgrade - - // TODO Do we need to adjust something else? - // Or should it be just start_lsn as it is? - let pgdata_lsn = (start_lsn + 1).align(); + let contolfile_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align(); + let start_lsn = start_lsn.align(); + // choose the max of controlfile_lsn and start_lsn + // + // It is possible that the controlfile_lsn is ahead of the start_lsn, + // especially for small databases + // In that case, we need to start from the controlfile_lsn. + // Otherwise we will have LSN on the pages larger that the lsn of the branch. + // And this will lead to the error, when compute will try to flush the page + // with the lsn larger than the branch lsn. + // + // ERROR : xlog flush request %X/%X is not satisfied --- flushed only to %X/%X + // + // We got another problem here - a gap between the + // branching_lsn (where we diverged with the parent) and pgdata_lsn (import lsn of the new timeline) + // We should teach the wal-redo to skip all the records between these two points. + // Otherwise we will see some updates from the parent timeline in the new timeline + let pgdata_lsn = std::cmp::max(contolfile_lsn, start_lsn); assert!(pgdata_lsn.is_aligned()); - // TODO - // We must have start_lsn+1 == pgdata_lsn - // Set it somehow // TODO why do we need these lines? let tenant_shard_id = uninitialized_timeline.owning_tenant.tenant_shard_id; @@ -3442,6 +3451,7 @@ impl Tenant { &pgdata_path, pgdata_lsn, true, + Some(src_timeline), ctx, ) .await @@ -3667,6 +3677,7 @@ impl Tenant { &pgdata_path, pgdata_lsn, false, + None, ctx, ) .await