diff --git a/pageserver/src/import_datadir.rs b/pageserver/src/import_datadir.rs index f56522b8ce..119c18fcce 100644 --- a/pageserver/src/import_datadir.rs +++ b/pageserver/src/import_datadir.rs @@ -192,19 +192,26 @@ fn import_rel( ensure!(len % pg_constants::BLCKSZ as usize == 0); let nblocks = len / pg_constants::BLCKSZ as usize; - if segno != 0 { - todo!(); - } - let rel = RelTag { spcnode: spcoid, dbnode: dboid, relnode, forknum, }; - modification.put_rel_creation(rel, nblocks as u32)?; let mut blknum: u32 = segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32); + + // Call put_rel_creation for every segment of the relation, + // because there is no guarantee about the order in which we are processing segments. + // ignore "relation already exists" error + if let Err(e) = modification.put_rel_creation(rel, nblocks as u32) { + if e.to_string().contains("already exists") { + info!("relation {} already exists. we must be extending it", rel); + } else { + return Err(e); + } + } + loop { let r = reader.read_exact(&mut buf); match r { @@ -216,7 +223,9 @@ fn import_rel( Err(err) => match err.kind() { std::io::ErrorKind::UnexpectedEof => { // reached EOF. That's expected. - ensure!(blknum == nblocks as u32, "unexpected EOF"); + let relative_blknum = + blknum - segno * (1024 * 1024 * 1024 / pg_constants::BLCKSZ as u32); + ensure!(relative_blknum == nblocks as u32, "unexpected EOF"); break; } _ => { @@ -227,6 +236,12 @@ fn import_rel( blknum += 1; } + // Update relation size + // + // If we process rel segments out of order, + // put_rel_extend will skip the update. + modification.put_rel_extend(rel, blknum)?; + Ok(()) } diff --git a/pageserver/src/pgdatadir_mapping.rs b/pageserver/src/pgdatadir_mapping.rs index 626ed1b0f1..59a53d68a1 100644 --- a/pageserver/src/pgdatadir_mapping.rs +++ b/pageserver/src/pgdatadir_mapping.rs @@ -749,6 +749,7 @@ impl<'a, R: Repository> DatadirModification<'a, R> { } /// Extend relation + /// If new size is smaller, do nothing. pub fn put_rel_extend(&mut self, rel: RelTag, nblocks: BlockNumber) -> Result<()> { ensure!(rel.relnode != 0, "invalid relnode"); @@ -756,10 +757,13 @@ impl<'a, R: Repository> DatadirModification<'a, R> { let size_key = rel_size_to_key(rel); let old_size = self.get(size_key)?.get_u32_le(); - let buf = nblocks.to_le_bytes(); - self.put(size_key, Value::Image(Bytes::from(buf.to_vec()))); + // only extend relation here. never decrease the size + if nblocks > old_size { + let buf = nblocks.to_le_bytes(); + self.put(size_key, Value::Image(Bytes::from(buf.to_vec()))); - self.pending_nblocks += nblocks as isize - old_size as isize; + self.pending_nblocks += nblocks as isize - old_size as isize; + } Ok(()) } diff --git a/test_runner/batch_others/test_import.py b/test_runner/batch_others/test_import.py index b913e2d9b4..2a916a5685 100644 --- a/test_runner/batch_others/test_import.py +++ b/test_runner/batch_others/test_import.py @@ -11,8 +11,11 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Put data in vanilla pg vanilla_pg.start() vanilla_pg.safe_psql("create user cloud_admin with password 'postgres' superuser") - vanilla_pg.safe_psql("create table t as select generate_series(1,300000)") - assert vanilla_pg.safe_psql('select count(*) from t') == [(300000, )] + vanilla_pg.safe_psql('''create table t as select 'long string to consume some space' || g + from generate_series(1,30000000) g''') + assert vanilla_pg.safe_psql('select count(*) from t') == [(30000000, )] + # ensure that relation is larger than 1GB to test multisegment restore + assert vanilla_pg.safe_psql("select pg_relation_size('t')")[0][0] > 1024 * 1024 * 1024 # Take basebackup basebackup_dir = os.path.join(test_output_dir, "basebackup") @@ -62,4 +65,4 @@ def test_import_from_vanilla(test_output_dir, pg_bin, vanilla_pg, neon_env_build # Check it worked pg = env.postgres.create_start(node_name, tenant_id=tenant) - assert pg.safe_psql('select count(*) from t') == [(300000, )] + assert pg.safe_psql('select count(*) from t') == [(30000000, )]