Choose the max(branching_lsn, after_pg_upgrade_lsn) to import new timeline data.

This fixed xlog flush request %X/%X is not satisfied error. See comments in the branch_timeline_impl().
2026-01-16 18:02:56 +00:00 · 2024-09-13 04:24:46 +01:00
parent 168a6a87d7
commit 33ad0dc177
2 changed files with 74 additions and 31 deletions
--- a/pageserver/src/import_datadir.rs
+++ b/pageserver/src/import_datadir.rs
@@ -55,6 +55,7 @@ pub async fn import_timeline_from_postgres_datadir(
    pgdata_path: &Utf8Path,
    pgdata_lsn: Lsn,
    change_control_file_lsn: bool,
+    src_timeline: Option<&Timeline>,
    ctx: &RequestContext,
 ) -> Result<()> {
    let mut pg_control: Option<ControlFileData> = None;
@@ -101,6 +102,37 @@ pub async fn import_timeline_from_postgres_datadir(
        }
    }

+    // // if we're importing after pg_upgrade
+    // // also copy metadata for all relations that were not copied
+    // // from the parent timeline
+    // if let Some(src_timeline) = src_timeline {
+    //     for ((spcnode, dbnode), _) in src_timeline
+    //         .list_dbdirs(pgdata_lsn, ctx)
+    //         .await
+    //         .with_context(|| format!("Failed to list_dbdirs for src_timeline"))?
+    //     {
+    //         let rels = src_timeline
+    //             .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx)
+    //             .await
+    //             .with_context(|| format!("Failed to list_rels for src_timeline"))?;
+
+    //         let new_rels = tline
+    //             .list_rels(spcnode, dbnode, Version::Lsn(pgdata_lsn), ctx)
+    //             .await
+    //             .with_context(|| format!("Failed to list_rels for new_timeline"))?;
+
+    //         for rel in rels {
+    //             if !new_rels.contains(&rel) {
+    //                 let nblocks = src_timeline
+    //                     .get_rel_size(rel, Version::Lsn(pgdata_lsn), ctx)
+    //                     .await
+    //                     .with_context(|| format!("Failed to get_rel_size for src_timeline"))?;
+    //                 // TODO insert relation size into the new timeline's cache
+    //             }
+    //         }
+    //     }
+    // }
+
    // We're done importing all the data files.
    modification.commit(ctx).await?;

@@ -136,25 +168,27 @@ pub async fn import_timeline_from_postgres_datadir(
    Ok(())
 }

-
 fn is_user_relfile(path: &Path) -> bool {
    let filename = &path
        .file_name()
        .expect("missing rel filename")
        .to_string_lossy();
-    let (relnode, _, _) = parse_relfilename(filename).map_err(|e| {
-        warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
-        e
-    }).unwrap();
+    let (relnode, _, _) = parse_relfilename(filename)
+        .map_err(|e| {
+            warn!("unrecognized file in postgres datadir: {:?} ({})", path, e);
+            e
+        })
+        .unwrap();

    // if this is import after pg_upgrade, skip all user data files
    //  relfilenode > FirstNormalObjectId of the new cluster

-    // THIS IS BAD
+    // THIS IS WRONG
    // if catalog relation was vacuumed with vacuum full, it will have a new relfilenode
    // which will be greater than FirstNormalObjectId
+    // Use pg_relfilemap decide if the relation is a catalog relation
    if relnode > pg_constants::FIRST_NORMAL_OBJECT_ID {
-        // 
+        //
        return true;
    }

@@ -182,7 +216,6 @@ async fn import_rel(
        e
    })?;

-
    let mut buf: [u8; 8192] = [0u8; 8192];

    ensure!(len % BLCKSZ as usize == 0);
@@ -606,14 +639,14 @@ async fn import_file(
            _ => {
                // if this is import after pg_upgrade, skip all user data files
                //  relfilenode > FirstNormalObjectId of the new cluster
-                if is_user_relfile(file_path) && new_checkpoint_lsn.is_some()
-                {
+                // TODO Implement import_rel_from_old_version that will copy
+                // relation metadata and cached size from the parent timeline
+                if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() {
                    info!("after pg_restore skipping {:?}", file_path);
                } else {
                    import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;
                    debug!("imported rel creation");
                }
-
            }
        }
    } else if file_path.starts_with("base") {
@@ -637,10 +670,9 @@ async fn import_file(
                debug!("ignored PG_VERSION file");
            }
            _ => {
-                  // if this is import after pg_upgrade, skip all user data files
+                // if this is import after pg_upgrade, skip all user data files
                //  relfilenode > FirstNormalObjectId of the new cluster
-                if is_user_relfile(file_path) && new_checkpoint_lsn.is_some()
-                {
+                if is_user_relfile(file_path) && new_checkpoint_lsn.is_some() {
                    info!("after pg_restore skipping {:?}", file_path);
                } else {
                    import_rel(modification, file_path, spcnode, dbnode, reader, len, ctx).await?;
--- a/pageserver/src/tenant.rs
+++ b/pageserver/src/tenant.rs
@@ -3393,6 +3393,14 @@ impl Tenant {
                )
                })?;

+            // TODO
+            // do pg_upgrade bits here
+            // Rust is not the most convenient for writing this,
+            // So just call the pg_upgrade in the subprocess.
+            // In the future we can turn it into API call to some service that will do the work
+            //
+            // 1. start postgres on a parent timeline at the start_lsn, using neon_local (now this is hardcoded)
+            // 2. run pg_upgrade using neon_local for old version and freshly created pgdata for new version
            run_pg_upgrade(
                self.conf,
                &pgdata_path,
@@ -3407,25 +3415,26 @@ impl Tenant {
                    "Failed to pg_upgrade {timeline_id} with pg_version {pg_version} at {pgdata_path}"
                )
            })?;
-            // TODO
-            // do pg_upgrade bits here
-            // Rust is not the most convenient for writing this,
-            // So just call the bash script that relies on the pg_upgrade and neon_local to do all the work.
-            // In the future we can turn it into API call to some service that will do the work

-            // 1. start postgres on a parent timeline at the start_lsn, using neon_local (somehow)
-            // 2. run pg_upgrade using old neon_local and new neon_local (?) + new freshly created pgdata
-            // Or maybe use ad-hoc thing?
-            // We need old cluster read-only
-            // And new cluster read-write, but we will export it fully, so we can do whatever we want during upgrade
-
-            // TODO Do we need to adjust something else?
-            // Or should it be just start_lsn as it is?
-            let pgdata_lsn = (start_lsn + 1).align();
+            let contolfile_lsn = import_datadir::get_lsn_from_controlfile(&pgdata_path)?.align();
+            let start_lsn = start_lsn.align();
+            // choose the max of controlfile_lsn and start_lsn
+            //
+            // It is possible that the controlfile_lsn is ahead of the start_lsn,
+            // especially for small databases
+            // In that case, we need to start from the controlfile_lsn.
+            // Otherwise we will have LSN on the pages larger that the lsn of the branch.
+            // And this will lead to the error, when compute will try to flush the page
+            // with the lsn larger than the branch lsn.
+            //
+            // ERROR : xlog flush request %X/%X is not satisfied --- flushed only to %X/%X
+            //
+            // We got another problem here - a gap between the
+            // branching_lsn (where we diverged with the parent) and pgdata_lsn (import lsn of the new timeline)
+            // We should teach the wal-redo to skip all the records between these two points.
+            // Otherwise we will see some updates from the parent timeline in the new timeline
+            let pgdata_lsn = std::cmp::max(contolfile_lsn, start_lsn);
            assert!(pgdata_lsn.is_aligned());
-            // TODO
-            // We must have start_lsn+1 == pgdata_lsn
-            // Set it somehow

            // TODO why do we need these lines?
            let tenant_shard_id = uninitialized_timeline.owning_tenant.tenant_shard_id;
@@ -3442,6 +3451,7 @@ impl Tenant {
                &pgdata_path,
                pgdata_lsn,
                true,
+                Some(src_timeline),
                ctx,
            )
            .await
@@ -3667,6 +3677,7 @@ impl Tenant {
            &pgdata_path,
            pgdata_lsn,
            false,
+            None,
            ctx,
        )
        .await