Merge branch 'hack/fast-import' of github.com:neondatabase/neon into hack/fast-import

This commit is contained in:
Stas Kelvich
2024-09-12 19:26:16 +01:00
2 changed files with 97 additions and 12 deletions

View File

@@ -13,6 +13,7 @@ use utils::{id::{NodeId, TenantId, TimelineId}, shard::{ShardCount, ShardNumber,
use walkdir::WalkDir;
use crate::{context::{DownloadBehavior, RequestContext}, pgdatadir_mapping::{DbDirectory, RelDirectory}, task_mgr::TaskKind, tenant::storage_layer::ImageLayerWriter};
use crate::pgdatadir_mapping::{SlruSegmentDirectory, TwoPhaseDirectory};
use crate::config::PageServerConf;
use tokio::io::AsyncReadExt;
@@ -25,8 +26,12 @@ use crate::tenant::remote_timeline_client;
use crate::tenant::remote_timeline_client::LayerFileMetadata;
use pageserver_api::shard::ShardIndex;
use pageserver_api::key::Key;
use pageserver_api::reltag::SlruKind;
use pageserver_api::key::{slru_block_to_key, slru_dir_to_key, slru_segment_size_to_key, TWOPHASEDIR_KEY, CONTROLFILE_KEY, CHECKPOINT_KEY};
use utils::bin_ser::BeSer;
use std::collections::HashSet;
pub struct PgImportEnv {
ctx: RequestContext,
conf: &'static PageServerConf,
@@ -65,7 +70,11 @@ impl PgImportEnv {
pub async fn import_datadir(&mut self, pgdata_path: &Utf8PathBuf) -> anyhow::Result<()> {
// Read control file
let control_file = self.import_controlfile(pgdata_path).await?;
let controlfile_path = pgdata_path.join("global").join("pg_control");
let controlfile_buf = std::fs::read(&controlfile_path)
.with_context(|| format!("reading controlfile: {controlfile_path}"))?;
let control_file = ControlFileData::decode(&controlfile_buf)?;
let pgdata_lsn = Lsn(control_file.checkPoint).align();
let timeline_path = self.conf.timeline_path(&self.tsi, &self.tli);
@@ -92,6 +101,28 @@ impl PgImportEnv {
self.import_db(&mut one_big_layer, &db).await?;
}
// Import SLRUs
// pg_xact (01:00 keyspace)
self.import_slru(&mut one_big_layer, SlruKind::Clog, &pgdata_path.join("pg_xact")).await?;
// pg_multixact/members (01:01 keyspace)
self.import_slru(&mut one_big_layer, SlruKind::MultiXactMembers, &pgdata_path.join("pg_multixact/members")).await?;
// pg_multixact/offsets (01:02 keyspace)
self.import_slru(&mut one_big_layer, SlruKind::MultiXactOffsets, &pgdata_path.join("pg_multixact/offsets")).await?;
// Import pg_twophase.
// TODO: as empty
let twophasedir_buf = TwoPhaseDirectory::ser(
&TwoPhaseDirectory { xids: HashSet::new() }
)?;
one_big_layer.put_image(TWOPHASEDIR_KEY, Bytes::from(twophasedir_buf), &self.ctx).await?;
// Controlfile, checkpoint
one_big_layer.put_image(CONTROLFILE_KEY, Bytes::from(controlfile_buf), &self.ctx).await?;
let checkpoint_buf = control_file.checkPointCopy.encode()?;
one_big_layer.put_image(CHECKPOINT_KEY, checkpoint_buf, &self.ctx).await?;
let layerdesc = one_big_layer.finish_raw(&self.ctx).await?;
// should we anything about the wal?
@@ -116,13 +147,6 @@ impl PgImportEnv {
Ok(())
}
async fn import_controlfile(&mut self, pgdata_path: &Utf8Path) -> anyhow::Result<ControlFileData> {
let controlfile_path = pgdata_path.join("global").join("pg_control");
let controlfile_buf = std::fs::read(&controlfile_path)
.with_context(|| format!("reading controlfile: {controlfile_path}"))?;
ControlFileData::decode(&controlfile_buf)
}
async fn import_db(
&mut self,
layer_writer: &mut ImageLayerWriter,
@@ -213,6 +237,67 @@ impl PgImportEnv {
Ok(())
}
async fn import_slru(
&mut self,
layer_writer: &mut ImageLayerWriter,
kind: SlruKind,
path: &Utf8PathBuf,
) -> anyhow::Result<()> {
let segments: Vec<(String, u32)> = WalkDir::new(path)
.max_depth(1)
.into_iter()
.filter_map(|entry| {
let entry = entry.ok()?;
let filename = entry.file_name();
let filename = filename.to_string_lossy();
let segno = u32::from_str_radix(&filename, 16).ok()?;
Some((filename.to_string(), segno))
}).collect();
// Write SlruDir
let slrudir_key = slru_dir_to_key(kind);
let segnos: HashSet<u32> = segments.iter().map(|(_path, segno)| { *segno }).collect();
let slrudir = SlruSegmentDirectory {
segments: segnos,
};
let slrudir_buf = SlruSegmentDirectory::ser(&slrudir)?;
layer_writer.put_image(slrudir_key, slrudir_buf.into(), &self.ctx).await?;
for (segpath, segno) in segments {
// SlruSegBlocks for each segment
let p = path.join(Utf8PathBuf::from(segpath));
let mut reader = tokio::fs::File::open(&p).await
.context(format!("opening {}", &p))?;
let mut rpageno = 0;
loop {
let mut buf: Vec<u8> = Vec::new();
buf.resize(8192, 0);
let r = reader.read_exact(&mut buf).await;
match r {
Ok(_) => {},
Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => {
// reached EOF. That's expected
break;
}
Err(err) => {
bail!("error reading file {}: {:#}", &p, err);
}
};
let slruseg_key = slru_block_to_key(kind, segno, rpageno);
layer_writer.put_image(slruseg_key, Bytes::from(buf), &self.ctx).await?;
rpageno += 1;
}
let npages: u32 = rpageno;
// Followed by SlruSegSize
let segsize_key = slru_segment_size_to_key(kind, segno);
let segsize_buf = npages.to_le_bytes();
layer_writer.put_image(segsize_key, Bytes::copy_from_slice(&segsize_buf), &self.ctx).await?;
}
Ok(())
}
async fn create_index_part(&mut self, layers: &[PersistentLayerDesc], control_file: &ControlFileData) -> anyhow::Result<()> {
let dstdir = &self.conf.workdir;

View File

@@ -1988,8 +1988,8 @@ pub struct DbDirectory {
}
#[derive(Debug, Serialize, Deserialize)]
struct TwoPhaseDirectory {
xids: HashSet<TransactionId>,
pub(crate) struct TwoPhaseDirectory {
pub(crate) xids: HashSet<TransactionId>,
}
#[derive(Debug, Serialize, Deserialize, Default)]
@@ -2022,9 +2022,9 @@ struct RelSizeEntry {
}
#[derive(Debug, Serialize, Deserialize, Default)]
struct SlruSegmentDirectory {
pub(crate) struct SlruSegmentDirectory {
// Set of SLRU segments that exist.
segments: HashSet<u32>,
pub(crate) segments: HashSet<u32>,
}
#[derive(Copy, Clone, PartialEq, Eq, Debug, enum_map::Enum)]