mirror of
https://github.com/neondatabase/neon.git
synced 2026-01-06 04:52:55 +00:00
code cleanup for compute_node_rebase branch
This commit is contained in:
@@ -230,7 +230,7 @@ fn init_logging(conf: &PageServerConf) -> slog_scope::GlobalLoggerGuard {
|
||||
if record.level().is_at_least(slog::Level::Info) {
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
let drain = std::sync::Mutex::new(drain).fuse();
|
||||
let logger = slog::Logger::root(drain, slog::o!());
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
#![allow(non_camel_case_types)]
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use std::io::prelude::*;
|
||||
use std::fs::File;
|
||||
use std::io::prelude::*;
|
||||
use std::io::SeekFrom;
|
||||
|
||||
use bytes::{Buf, Bytes};
|
||||
@@ -11,86 +11,79 @@ use log::*;
|
||||
|
||||
type XLogRecPtr = u64;
|
||||
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
/*
|
||||
* Body of CheckPoint XLOG records. This is declared here because we keep
|
||||
* a copy of the latest one in pg_control for possible disaster recovery.
|
||||
* Changing this struct requires a PG_CONTROL_VERSION bump.
|
||||
*/
|
||||
pub struct CheckPoint {
|
||||
pub redo: XLogRecPtr, /* next RecPtr available when we began to
|
||||
* create CheckPoint (i.e. REDO start point) */
|
||||
pub ThisTimeLineID: u32, /* current TLI */
|
||||
pub PrevTimeLineID: u32, /* previous TLI, if this record begins a new
|
||||
* timeline (equals ThisTimeLineID otherwise) */
|
||||
pub fullPageWrites: bool, /* current full_page_writes */
|
||||
pub nextXid: u64, /* next free transaction ID */
|
||||
pub nextOid: u32, /* next free OID */
|
||||
pub nextMulti: u32, /* next free MultiXactId */
|
||||
pub nextMultiOffset: u32, /* next free MultiXact offset */
|
||||
pub oldestXid: u32, /* cluster-wide minimum datfrozenxid */
|
||||
pub oldestXidDB: u32, /* database with minimum datfrozenxid */
|
||||
pub oldestMulti: u32, /* cluster-wide minimum datminmxid */
|
||||
pub oldestMultiDB: u32, /* database with minimum datminmxid */
|
||||
pub time: u64, /* time stamp of checkpoint */
|
||||
pub oldestCommitTsXid: u32, /* oldest Xid with valid commit
|
||||
* timestamp */
|
||||
pub newestCommitTsXid: u32, /* newest Xid with valid commit
|
||||
* timestamp */
|
||||
pub redo: XLogRecPtr, /* next RecPtr available when we began to
|
||||
* create CheckPoint (i.e. REDO start point) */
|
||||
pub ThisTimeLineID: u32, /* current TLI */
|
||||
pub PrevTimeLineID: u32, /* previous TLI, if this record begins a new
|
||||
* timeline (equals ThisTimeLineID otherwise) */
|
||||
pub fullPageWrites: bool, /* current full_page_writes */
|
||||
pub nextXid: u64, /* next free transaction ID */
|
||||
pub nextOid: u32, /* next free OID */
|
||||
pub nextMulti: u32, /* next free MultiXactId */
|
||||
pub nextMultiOffset: u32, /* next free MultiXact offset */
|
||||
pub oldestXid: u32, /* cluster-wide minimum datfrozenxid */
|
||||
pub oldestXidDB: u32, /* database with minimum datfrozenxid */
|
||||
pub oldestMulti: u32, /* cluster-wide minimum datminmxid */
|
||||
pub oldestMultiDB: u32, /* database with minimum datminmxid */
|
||||
pub time: u64, /* time stamp of checkpoint */
|
||||
pub oldestCommitTsXid: u32, /* oldest Xid with valid commit
|
||||
* timestamp */
|
||||
pub newestCommitTsXid: u32, /* newest Xid with valid commit
|
||||
* timestamp */
|
||||
|
||||
/*
|
||||
* Oldest XID still running. This is only needed to initialize hot standby
|
||||
* mode from an online checkpoint, so we only bother calculating this for
|
||||
* online checkpoints and only when wal_level is replica. Otherwise it's
|
||||
* set to InvalidTransactionId.
|
||||
*/
|
||||
pub oldestActiveXid: u32,
|
||||
/*
|
||||
* Oldest XID still running. This is only needed to initialize hot standby
|
||||
* mode from an online checkpoint, so we only bother calculating this for
|
||||
* online checkpoints and only when wal_level is replica. Otherwise it's
|
||||
* set to InvalidTransactionId.
|
||||
*/
|
||||
pub oldestActiveXid: u32,
|
||||
}
|
||||
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ControlFileDataZenith {
|
||||
pub system_identifier: u64,
|
||||
pg_control_version: u32, /* PG_CONTROL_VERSION */
|
||||
catalog_version_no: u32, /* see catversion.h */
|
||||
pg_control_version: u32, /* PG_CONTROL_VERSION */
|
||||
catalog_version_no: u32, /* see catversion.h */
|
||||
|
||||
state: i32, /* see enum above */
|
||||
time: i64, /* time stamp of last pg_control update */
|
||||
pub checkPoint: XLogRecPtr,
|
||||
state: i32, /* see enum above */
|
||||
time: i64, /* time stamp of last pg_control update */
|
||||
pub checkPoint: XLogRecPtr,
|
||||
checkPointCopy: CheckPoint, /* copy of last check point record */
|
||||
unloggedLSN: XLogRecPtr, /* current fake LSN value, for unlogged rels */
|
||||
minRecoveryPoint: XLogRecPtr,
|
||||
minRecoveryPointTLI: u32,
|
||||
backupStartPoint: XLogRecPtr,
|
||||
backupEndPoint: XLogRecPtr,
|
||||
backupEndRequired: bool
|
||||
unloggedLSN: XLogRecPtr, /* current fake LSN value, for unlogged rels */
|
||||
minRecoveryPoint: XLogRecPtr,
|
||||
minRecoveryPointTLI: u32,
|
||||
backupStartPoint: XLogRecPtr,
|
||||
backupEndPoint: XLogRecPtr,
|
||||
backupEndRequired: bool,
|
||||
}
|
||||
|
||||
impl ControlFileDataZenith {
|
||||
pub fn new() -> ControlFileDataZenith
|
||||
{
|
||||
pub fn new() -> ControlFileDataZenith {
|
||||
ControlFileDataZenith {
|
||||
system_identifier: 0,
|
||||
system_identifier: 0,
|
||||
pg_control_version: 0,
|
||||
catalog_version_no: 0,
|
||||
catalog_version_no: 0,
|
||||
state: 0,
|
||||
time: 0,
|
||||
checkPoint: 0,
|
||||
checkPointCopy:
|
||||
{
|
||||
CheckPoint
|
||||
{
|
||||
checkPointCopy: {
|
||||
CheckPoint {
|
||||
redo: 0,
|
||||
ThisTimeLineID: 0,
|
||||
PrevTimeLineID: 0,
|
||||
fullPageWrites: false,
|
||||
nextXid: 0,
|
||||
nextOid:0,
|
||||
nextOid: 0,
|
||||
nextMulti: 0,
|
||||
nextMultiOffset: 0,
|
||||
oldestXid: 0,
|
||||
@@ -100,109 +93,113 @@ impl ControlFileDataZenith {
|
||||
time: 0,
|
||||
oldestCommitTsXid: 0,
|
||||
newestCommitTsXid: 0,
|
||||
oldestActiveXid:0
|
||||
oldestActiveXid: 0,
|
||||
}
|
||||
},
|
||||
unloggedLSN: 0,
|
||||
minRecoveryPoint: 0,
|
||||
minRecoveryPointTLI: 0,
|
||||
backupStartPoint: 0,
|
||||
backupEndPoint: 0,
|
||||
backupEndRequired: false,
|
||||
unloggedLSN: 0,
|
||||
minRecoveryPoint: 0,
|
||||
minRecoveryPointTLI: 0,
|
||||
backupStartPoint: 0,
|
||||
backupEndPoint: 0,
|
||||
backupEndRequired: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_pg_control(mut buf: Bytes) -> ControlFileDataZenith {
|
||||
|
||||
info!("decode pg_control");
|
||||
|
||||
let controlfile : ControlFileDataZenith = ControlFileDataZenith {
|
||||
system_identifier: buf.get_u64_le(),
|
||||
pg_control_version: buf.get_u32_le(),
|
||||
catalog_version_no: buf.get_u32_le(),
|
||||
state: buf.get_i32_le(),
|
||||
time: { buf.advance(4); buf.get_i64_le() },
|
||||
checkPoint: buf.get_u64_le(),
|
||||
checkPointCopy:
|
||||
{
|
||||
CheckPoint
|
||||
{
|
||||
redo: buf.get_u64_le(),
|
||||
ThisTimeLineID: buf.get_u32_le(),
|
||||
PrevTimeLineID: buf.get_u32_le(),
|
||||
fullPageWrites: buf.get_u8() != 0,
|
||||
nextXid: { buf.advance(7); buf.get_u64_le()},
|
||||
nextOid: buf.get_u32_le(),
|
||||
nextMulti: buf.get_u32_le(),
|
||||
nextMultiOffset: buf.get_u32_le(),
|
||||
oldestXid:buf.get_u32_le(),
|
||||
oldestXidDB: buf.get_u32_le(),
|
||||
oldestMulti: buf.get_u32_le(),
|
||||
oldestMultiDB: buf.get_u32_le(),
|
||||
time: { buf.advance(4); buf.get_u64_le()},
|
||||
oldestCommitTsXid: buf.get_u32_le(),
|
||||
newestCommitTsXid: buf.get_u32_le(),
|
||||
oldestActiveXid:buf.get_u32_le()
|
||||
}
|
||||
},
|
||||
unloggedLSN: buf.get_u64_le(),
|
||||
minRecoveryPoint: buf.get_u64_le(),
|
||||
minRecoveryPointTLI: buf.get_u32_le(),
|
||||
backupStartPoint:{ buf.advance(4); buf.get_u64_le()},
|
||||
backupEndPoint: buf.get_u64_le(),
|
||||
backupEndRequired: buf.get_u8() != 0,
|
||||
};
|
||||
let controlfile: ControlFileDataZenith = ControlFileDataZenith {
|
||||
system_identifier: buf.get_u64_le(),
|
||||
pg_control_version: buf.get_u32_le(),
|
||||
catalog_version_no: buf.get_u32_le(),
|
||||
state: buf.get_i32_le(),
|
||||
time: {
|
||||
buf.advance(4);
|
||||
buf.get_i64_le()
|
||||
},
|
||||
checkPoint: buf.get_u64_le(),
|
||||
checkPointCopy: {
|
||||
CheckPoint {
|
||||
redo: buf.get_u64_le(),
|
||||
ThisTimeLineID: buf.get_u32_le(),
|
||||
PrevTimeLineID: buf.get_u32_le(),
|
||||
fullPageWrites: buf.get_u8() != 0,
|
||||
nextXid: {
|
||||
buf.advance(7);
|
||||
buf.get_u64_le()
|
||||
},
|
||||
nextOid: buf.get_u32_le(),
|
||||
nextMulti: buf.get_u32_le(),
|
||||
nextMultiOffset: buf.get_u32_le(),
|
||||
oldestXid: buf.get_u32_le(),
|
||||
oldestXidDB: buf.get_u32_le(),
|
||||
oldestMulti: buf.get_u32_le(),
|
||||
oldestMultiDB: buf.get_u32_le(),
|
||||
time: {
|
||||
buf.advance(4);
|
||||
buf.get_u64_le()
|
||||
},
|
||||
oldestCommitTsXid: buf.get_u32_le(),
|
||||
newestCommitTsXid: buf.get_u32_le(),
|
||||
oldestActiveXid: buf.get_u32_le(),
|
||||
}
|
||||
},
|
||||
unloggedLSN: buf.get_u64_le(),
|
||||
minRecoveryPoint: buf.get_u64_le(),
|
||||
minRecoveryPointTLI: buf.get_u32_le(),
|
||||
backupStartPoint: {
|
||||
buf.advance(4);
|
||||
buf.get_u64_le()
|
||||
},
|
||||
backupEndPoint: buf.get_u64_le(),
|
||||
backupEndRequired: buf.get_u8() != 0,
|
||||
};
|
||||
|
||||
return controlfile;
|
||||
return controlfile;
|
||||
}
|
||||
|
||||
pub fn parse_controlfile(b: Bytes)
|
||||
{
|
||||
pub fn parse_controlfile(b: Bytes) {
|
||||
let controlfile = decode_pg_control(b);
|
||||
|
||||
info!("controlfile {:X}/{:X}",
|
||||
controlfile.checkPoint >> 32, controlfile.checkPoint);
|
||||
info!(
|
||||
"controlfile {:X}/{:X}",
|
||||
controlfile.checkPoint >> 32,
|
||||
controlfile.checkPoint
|
||||
);
|
||||
info!("controlfile {:?}", controlfile);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
const MAX_MAPPINGS: usize = 62;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct RelMapping
|
||||
{
|
||||
mapoid: u32, /* OID of a catalog */
|
||||
mapfilenode: u32 /* its filenode number */
|
||||
struct RelMapping {
|
||||
mapoid: u32, /* OID of a catalog */
|
||||
mapfilenode: u32, /* its filenode number */
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RelMapFile
|
||||
{
|
||||
magic: i32, /* always RELMAPPER_FILEMAGIC */
|
||||
num_mappings: i32, /* number of valid RelMapping entries */
|
||||
mappings: [u8; MAX_MAPPINGS*8],
|
||||
crc: u32, /* CRC of all above */
|
||||
pad: i32 /* to make the struct size be 512 exactly */
|
||||
pub struct RelMapFile {
|
||||
magic: i32, /* always RELMAPPER_FILEMAGIC */
|
||||
num_mappings: i32, /* number of valid RelMapping entries */
|
||||
mappings: [u8; MAX_MAPPINGS * 8],
|
||||
crc: u32, /* CRC of all above */
|
||||
pad: i32, /* to make the struct size be 512 exactly */
|
||||
}
|
||||
|
||||
pub fn decode_filemapping(mut buf: Bytes) -> RelMapFile {
|
||||
|
||||
info!("decode filemap");
|
||||
|
||||
let file : RelMapFile = RelMapFile {
|
||||
magic: buf.get_i32_le(), /* always RELMAPPER_FILEMAGIC */
|
||||
num_mappings: buf.get_i32_le(), /* number of valid RelMapping entries */
|
||||
mappings: {
|
||||
let mut arr = [0 as u8; MAX_MAPPINGS*8];
|
||||
let file: RelMapFile = RelMapFile {
|
||||
magic: buf.get_i32_le(), /* always RELMAPPER_FILEMAGIC */
|
||||
num_mappings: buf.get_i32_le(), /* number of valid RelMapping entries */
|
||||
mappings: {
|
||||
let mut arr = [0 as u8; MAX_MAPPINGS * 8];
|
||||
buf.copy_to_slice(&mut arr);
|
||||
arr
|
||||
}
|
||||
,
|
||||
crc: buf.get_u32_le(), /* CRC of all above */
|
||||
pad: buf.get_i32_le()
|
||||
},
|
||||
crc: buf.get_u32_le(), /* CRC of all above */
|
||||
pad: buf.get_i32_le(),
|
||||
};
|
||||
|
||||
info!("decode filemap {:?}", file);
|
||||
@@ -210,13 +207,12 @@ pub fn decode_filemapping(mut buf: Bytes) -> RelMapFile {
|
||||
}
|
||||
|
||||
pub fn write_buf_to_file(filepath: String, buf: Bytes, blkno: u32) {
|
||||
|
||||
info!("write_buf_to_file {}", filepath.clone());
|
||||
|
||||
let mut buffer = File::create(filepath.clone()).unwrap();
|
||||
buffer.seek(SeekFrom::Start(8192*blkno as u64)).unwrap();
|
||||
buffer.seek(SeekFrom::Start(8192 * blkno as u64)).unwrap();
|
||||
|
||||
buffer.write_all(&buf).unwrap();
|
||||
|
||||
info!("DONE write_buf_to_file {}", filepath);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[allow(dead_code)]
|
||||
|
||||
pub mod pg_constants;
|
||||
pub mod controlfile;
|
||||
pub mod page_cache;
|
||||
pub mod page_service;
|
||||
#[allow(dead_code)]
|
||||
pub mod pg_constants;
|
||||
pub mod restore_s3;
|
||||
pub mod tui;
|
||||
pub mod tui_event;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -374,7 +374,6 @@ impl Connection {
|
||||
self.stream.write_u32(resp.n_blocks).await?;
|
||||
self.stream.write_buf(&mut resp.page.clone()).await?;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -429,7 +428,6 @@ impl Connection {
|
||||
trace!("got query {:?}", q.body);
|
||||
|
||||
if q.body.starts_with(b"file") {
|
||||
|
||||
let (_l, r) = q.body.split_at("file ".len());
|
||||
//TODO parse it correctly
|
||||
let r = r.to_vec();
|
||||
@@ -439,23 +437,44 @@ impl Connection {
|
||||
let mut s;
|
||||
|
||||
let filepath = split.next().unwrap();
|
||||
let sysid = { s = split.next().unwrap(); s.parse::<u64>().unwrap()};
|
||||
let sysid = {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u64>().unwrap()
|
||||
};
|
||||
|
||||
let buf_tag = page_cache::BufferTag {
|
||||
spcnode: { s = split.next().unwrap(); s.parse::<u32>().unwrap() },
|
||||
dbnode: { s = split.next().unwrap(); s.parse::<u32>().unwrap() },
|
||||
relnode: { s = split.next().unwrap(); s.parse::<u32>().unwrap() },
|
||||
forknum: { s = split.next().unwrap(); s.parse::<u8>().unwrap() },
|
||||
blknum: { s = split.next().unwrap(); s.parse::<u32>().unwrap() }
|
||||
spcnode: {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u32>().unwrap()
|
||||
},
|
||||
dbnode: {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u32>().unwrap()
|
||||
},
|
||||
relnode: {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u32>().unwrap()
|
||||
},
|
||||
forknum: {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u8>().unwrap()
|
||||
},
|
||||
blknum: {
|
||||
s = split.next().unwrap();
|
||||
s.parse::<u32>().unwrap()
|
||||
},
|
||||
};
|
||||
|
||||
//TODO PARSE LSN
|
||||
//let lsn = { s = split.next().unwrap(); s.parse::<u64>().unwrap()};
|
||||
let lsn: u64 = 0;
|
||||
info!("process file query sysid {} -- {:?} lsn {}",sysid, buf_tag, lsn);
|
||||
|
||||
self.handle_file(filepath.to_string(), sysid, buf_tag, lsn.into()).await
|
||||
info!(
|
||||
"process file query sysid {} -- {:?} lsn {}",
|
||||
sysid, buf_tag, lsn
|
||||
);
|
||||
|
||||
self.handle_file(filepath.to_string(), sysid, buf_tag, lsn.into())
|
||||
.await
|
||||
} else if q.body.starts_with(b"pagestream ") {
|
||||
let (_l, r) = q.body.split_at("pagestream ".len());
|
||||
let mut r = r.to_vec();
|
||||
@@ -502,9 +521,13 @@ impl Connection {
|
||||
}
|
||||
}
|
||||
|
||||
async fn handle_file(&mut self, filepath: String, sysid:u64,
|
||||
buf_tag: page_cache::BufferTag, lsn:u64) -> Result<()> {
|
||||
|
||||
async fn handle_file(
|
||||
&mut self,
|
||||
filepath: String,
|
||||
sysid: u64,
|
||||
buf_tag: page_cache::BufferTag,
|
||||
lsn: u64,
|
||||
) -> Result<()> {
|
||||
let pcache = page_cache::get_pagecache(self.conf.clone(), sysid);
|
||||
|
||||
match pcache.get_page_at_lsn(buf_tag, lsn) {
|
||||
@@ -512,16 +535,17 @@ impl Connection {
|
||||
info!("info succeeded get_page_at_lsn: {}", lsn);
|
||||
|
||||
controlfile::write_buf_to_file(filepath, p, buf_tag.blknum);
|
||||
|
||||
},
|
||||
}
|
||||
Err(e) => {
|
||||
info!("page not found and it's ok. get_page_at_lsn: {}", e);
|
||||
}
|
||||
};
|
||||
|
||||
self.write_message_noflush(&BeMessage::RowDescription).await?;
|
||||
self.write_message_noflush(&BeMessage::RowDescription)
|
||||
.await?;
|
||||
self.write_message_noflush(&BeMessage::DataRow).await?;
|
||||
self.write_message_noflush(&BeMessage::CommandComplete).await?;
|
||||
self.write_message_noflush(&BeMessage::CommandComplete)
|
||||
.await?;
|
||||
self.write_message(&BeMessage::ReadyForQuery).await
|
||||
}
|
||||
|
||||
@@ -588,7 +612,7 @@ impl Connection {
|
||||
|
||||
let n_blocks = pcache.relsize_get(&tag);
|
||||
|
||||
info!("ZenithNblocksRequest {:?} = {}", tag, n_blocks);
|
||||
trace!("ZenithNblocksRequest {:?} = {}", tag, n_blocks);
|
||||
self.write_message(&BeMessage::ZenithNblocksResponse(ZenithStatusResponse {
|
||||
ok: true,
|
||||
n_blocks: n_blocks,
|
||||
@@ -608,26 +632,23 @@ impl Connection {
|
||||
Ok(p) => {
|
||||
let mut b = BytesMut::with_capacity(8192);
|
||||
|
||||
info!("ZenithReadResponse get_page_at_lsn succeed");
|
||||
if p.len() < 8192
|
||||
{
|
||||
trace!("ZenithReadResponse get_page_at_lsn succeed");
|
||||
if p.len() < 8192 {
|
||||
//add padding
|
||||
info!("ZenithReadResponse add padding");
|
||||
trace!("ZenithReadResponse add padding");
|
||||
let padding: [u8; 8192 - 512] = [0; 8192 - 512];
|
||||
b.extend_from_slice(&p);
|
||||
b.extend_from_slice(&padding);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
b.extend_from_slice(&p);
|
||||
}
|
||||
|
||||
BeMessage::ZenithReadResponse(ZenithReadResponse {
|
||||
ok: true,
|
||||
n_blocks: 0,
|
||||
page: b.freeze()
|
||||
page: b.freeze(),
|
||||
})
|
||||
},
|
||||
}
|
||||
Err(e) => {
|
||||
const ZERO_PAGE: [u8; 8192] = [0; 8192];
|
||||
error!("get_page_at_lsn: {}", e);
|
||||
@@ -648,7 +669,7 @@ impl Connection {
|
||||
relnode: req.relnode,
|
||||
forknum: req.forknum,
|
||||
};
|
||||
info!("ZenithCreateRequest {:?}", tag);
|
||||
trace!("ZenithCreateRequest {:?}", tag);
|
||||
|
||||
pcache.relsize_inc(&tag, None);
|
||||
|
||||
@@ -666,7 +687,7 @@ impl Connection {
|
||||
forknum: req.forknum,
|
||||
};
|
||||
|
||||
info!("ZenithExtendRequest {:?} to {}", tag, req.blkno);
|
||||
trace!("ZenithExtendRequest {:?} to {}", tag, req.blkno);
|
||||
|
||||
pcache.relsize_inc(&tag, Some(req.blkno));
|
||||
|
||||
|
||||
@@ -1,13 +1,11 @@
|
||||
|
||||
// From pg_tablespace_d.h
|
||||
//
|
||||
// FIXME: we'll probably need these elsewhere too, move to some common location
|
||||
pub const DEFAULTTABLESPACE_OID:u32 = 1663;
|
||||
pub const GLOBALTABLESPACE_OID:u32 = 1664;
|
||||
pub const DEFAULTTABLESPACE_OID: u32 = 1663;
|
||||
pub const GLOBALTABLESPACE_OID: u32 = 1664;
|
||||
//Special values for non-rel files' tags
|
||||
//TODO maybe use enum?
|
||||
pub const PG_CONTROLFILE_FORKNUM:u32 = 42;
|
||||
pub const PG_FILENODEMAP_FORKNUM:u32 = 43;
|
||||
pub const PG_XACT_FORKNUM:u32 = 44;
|
||||
pub const PG_MXACT_OFFSETS_FORKNUM:u32 = 45;
|
||||
pub const PG_MXACT_MEMBERS_FORKNUM:u32 = 46;
|
||||
pub const PG_CONTROLFILE_FORKNUM: u32 = 42;
|
||||
pub const PG_FILENODEMAP_FORKNUM: u32 = 43;
|
||||
pub const PG_XACT_FORKNUM: u32 = 44;
|
||||
pub const PG_MXACT_OFFSETS_FORKNUM: u32 = 45;
|
||||
pub const PG_MXACT_MEMBERS_FORKNUM: u32 = 46;
|
||||
|
||||
@@ -22,7 +22,7 @@ use tokio::runtime;
|
||||
|
||||
use futures::future;
|
||||
|
||||
use crate::{PageServerConf, page_cache, pg_constants, controlfile};
|
||||
use crate::{controlfile, page_cache, pg_constants, PageServerConf};
|
||||
|
||||
struct Storage {
|
||||
region: Region,
|
||||
@@ -86,7 +86,12 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
||||
|
||||
//Before uploading other files, slurp pg_control to set systemid
|
||||
|
||||
let control_results: Vec<s3::serde_types::ListBucketResult> = bucket.list("relationdata/global/pg_control".to_string(), Some("".to_string())).await?;
|
||||
let control_results: Vec<s3::serde_types::ListBucketResult> = bucket
|
||||
.list(
|
||||
"relationdata/global/pg_control".to_string(),
|
||||
Some("".to_string()),
|
||||
)
|
||||
.await?;
|
||||
let object = &(&control_results[0]).contents[0];
|
||||
let (data, _) = bucket.get_object(&object.key).await.unwrap();
|
||||
let bytes = BytesMut::from(data.as_slice()).freeze();
|
||||
@@ -131,10 +136,11 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
||||
}
|
||||
|
||||
//Now add nonrelation files
|
||||
let nonrelresults: Vec<s3::serde_types::ListBucketResult> = bucket.list("nonreldata/".to_string(), Some("".to_string())).await?;
|
||||
let nonrelresults: Vec<s3::serde_types::ListBucketResult> = bucket
|
||||
.list("nonreldata/".to_string(), Some("".to_string()))
|
||||
.await?;
|
||||
for result in nonrelresults {
|
||||
for object in result.contents {
|
||||
|
||||
// Download needed non relation files, slurping them into memory
|
||||
|
||||
let key = object.key;
|
||||
@@ -150,7 +156,9 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
||||
|
||||
slurp_futures.push(f);
|
||||
}
|
||||
Err(e) => { warn!("unrecognized file: {} ({})", relpath, e); }
|
||||
Err(e) => {
|
||||
warn!("unrecognized file: {} ({})", relpath, e);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -160,12 +168,14 @@ async fn restore_chunk(conf: &PageServerConf) -> Result<(), S3Error> {
|
||||
info!("{} files to restore...", slurp_futures.len());
|
||||
|
||||
future::join_all(slurp_futures).await;
|
||||
info!("restored! {:?} to {:?}", pcache.first_valid_lsn, pcache.last_valid_lsn);
|
||||
info!(
|
||||
"restored! {:?} to {:?}",
|
||||
pcache.first_valid_lsn, pcache.last_valid_lsn
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
struct FilePathError {
|
||||
msg: String,
|
||||
@@ -215,10 +225,8 @@ struct ParsedBaseImageFileName {
|
||||
pub lsn: u64,
|
||||
}
|
||||
|
||||
fn parse_lsn_from_filename(fname: &str) -> Result<u64, FilePathError>
|
||||
{
|
||||
|
||||
let (_, lsn_str) = fname.split_at(fname.len()-16);
|
||||
fn parse_lsn_from_filename(fname: &str) -> Result<u64, FilePathError> {
|
||||
let (_, lsn_str) = fname.split_at(fname.len() - 16);
|
||||
|
||||
let (lsnhi, lsnlo) = lsn_str.split_at(8);
|
||||
let lsn_hi = u64::from_str_radix(lsnhi, 16)?;
|
||||
@@ -267,10 +275,8 @@ fn parse_filename(fname: &str) -> Result<(u32, u32, u32, u64), FilePathError> {
|
||||
}
|
||||
|
||||
fn parse_nonrel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathError> {
|
||||
|
||||
//TODO parse segno from xact filenames too
|
||||
if let Some(fname) = path.strip_prefix("pg_xact/") {
|
||||
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -279,11 +285,9 @@ fn parse_nonrel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePat
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_XACT_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
}
|
||||
else if let Some(fname) = path.strip_prefix("pg_multixact/offsets") {
|
||||
|
||||
} else if let Some(fname) = path.strip_prefix("pg_multixact/offsets") {
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -292,11 +296,9 @@ fn parse_nonrel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePat
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_MXACT_OFFSETS_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
}
|
||||
else if let Some(fname) = path.strip_prefix("pg_multixact/members") {
|
||||
|
||||
} else if let Some(fname) = path.strip_prefix("pg_multixact/members") {
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -305,14 +307,11 @@ fn parse_nonrel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePat
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_MXACT_MEMBERS_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return Err(FilePathError::new("invalid non relation data file name"));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathError> {
|
||||
@@ -334,9 +333,7 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
|
||||
* <oid>.<segment number>
|
||||
*/
|
||||
if let Some(fname) = path.strip_prefix("global/") {
|
||||
|
||||
if fname.contains("pg_control")
|
||||
{
|
||||
if fname.contains("pg_control") {
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -345,12 +342,11 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_CONTROLFILE_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
}
|
||||
|
||||
if fname.contains("pg_filenode")
|
||||
{
|
||||
if fname.contains("pg_filenode") {
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -359,7 +355,7 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
}
|
||||
|
||||
@@ -386,8 +382,7 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
|
||||
return Err(FilePathError::new("invalid relation data file name"));
|
||||
};
|
||||
|
||||
if fname.contains("pg_filenode")
|
||||
{
|
||||
if fname.contains("pg_filenode") {
|
||||
let lsn = parse_lsn_from_filename(fname.clone())?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -396,11 +391,10 @@ fn parse_rel_file_path(path: &str) -> Result<ParsedBaseImageFileName, FilePathEr
|
||||
relnode: 0,
|
||||
forknum: pg_constants::PG_FILENODEMAP_FORKNUM,
|
||||
segno: 0,
|
||||
lsn
|
||||
lsn,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
let (relnode, forknum, segno, lsn) = parse_filename(fname)?;
|
||||
|
||||
return Ok(ParsedBaseImageFileName {
|
||||
@@ -441,18 +435,16 @@ async fn slurp_base_file(
|
||||
let pcache = page_cache::get_pagecache(conf.clone(), sys_id);
|
||||
|
||||
// pg_filenode.map has non-standard size - 512 bytes
|
||||
if parsed.forknum == pg_constants::PG_FILENODEMAP_FORKNUM
|
||||
{
|
||||
if parsed.forknum == pg_constants::PG_FILENODEMAP_FORKNUM {
|
||||
let b = bytes.clone();
|
||||
controlfile::decode_filemapping(b);
|
||||
while bytes.remaining() >= 512 {
|
||||
|
||||
let tag = page_cache::BufferTag {
|
||||
spcnode: parsed.spcnode,
|
||||
dbnode: parsed.dbnode,
|
||||
relnode: parsed.relnode,
|
||||
forknum: parsed.forknum as u8,
|
||||
blknum: 0
|
||||
blknum: 0,
|
||||
};
|
||||
|
||||
pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(512));
|
||||
@@ -466,11 +458,9 @@ async fn slurp_base_file(
|
||||
};
|
||||
|
||||
pcache.relsize_inc(&tag, Some(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// FIXME: use constants (BLCKSZ)
|
||||
let mut blknum: u32 = parsed.segno * (1024*1024*1024 / 8192);
|
||||
let mut blknum: u32 = parsed.segno * (1024 * 1024 * 1024 / 8192);
|
||||
let reltag = page_cache::RelTag {
|
||||
spcnode: parsed.spcnode,
|
||||
dbnode: parsed.dbnode,
|
||||
@@ -479,13 +469,12 @@ async fn slurp_base_file(
|
||||
};
|
||||
|
||||
while bytes.remaining() >= 8192 {
|
||||
|
||||
let tag = page_cache::BufferTag {
|
||||
spcnode: parsed.spcnode,
|
||||
dbnode: parsed.dbnode,
|
||||
relnode: parsed.relnode,
|
||||
forknum: parsed.forknum as u8,
|
||||
blknum: blknum
|
||||
blknum: blknum,
|
||||
};
|
||||
|
||||
pcache.put_page_image(tag, parsed.lsn, bytes.copy_to_bytes(8192));
|
||||
|
||||
@@ -238,7 +238,7 @@ const BLCKSZ: u16 = 8192;
|
||||
//
|
||||
const XLR_INFO_MASK: u8 = 0x0F;
|
||||
|
||||
const XLR_MAX_BLOCK_ID:u8 = 32;
|
||||
const XLR_MAX_BLOCK_ID: u8 = 32;
|
||||
|
||||
const XLR_BLOCK_ID_DATA_SHORT: u8 = 255;
|
||||
const XLR_BLOCK_ID_DATA_LONG: u8 = 254;
|
||||
@@ -260,8 +260,8 @@ const BKPIMAGE_APPLY: u8 = 0x04; /* page image should be restored during replay
|
||||
//
|
||||
// constants from clog.h
|
||||
//
|
||||
const CLOG_XACTS_PER_BYTE:u32 = 4;
|
||||
const CLOG_XACTS_PER_PAGE:u32 = 8192 * CLOG_XACTS_PER_BYTE;
|
||||
const CLOG_XACTS_PER_BYTE: u32 = 4;
|
||||
const CLOG_XACTS_PER_PAGE: u32 = 8192 * CLOG_XACTS_PER_BYTE;
|
||||
|
||||
pub struct DecodedBkpBlock {
|
||||
/* Is this block ref in use? */
|
||||
@@ -307,7 +307,7 @@ pub struct DecodedWALRecord {
|
||||
const XLOG_SWITCH: u8 = 0x40;
|
||||
const RM_XLOG_ID: u8 = 0;
|
||||
|
||||
const RM_XACT_ID:u8 = 1;
|
||||
const RM_XACT_ID: u8 = 1;
|
||||
// const RM_CLOG_ID:u8 = 3;
|
||||
//const RM_MULTIXACT_ID:u8 = 6;
|
||||
|
||||
@@ -327,7 +327,6 @@ const XLOG_XACT_OPMASK: u8 = 0x70;
|
||||
/* does this record have a 'xinfo' field or not */
|
||||
// const XLOG_XACT_HAS_INFO: u8 = 0x80;
|
||||
|
||||
|
||||
// Is this record an XLOG_SWITCH record? They need some special processing,
|
||||
// so we need to check for that before the rest of the parsing.
|
||||
//
|
||||
@@ -369,9 +368,9 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
||||
buf.advance(2); // 2 bytes of padding
|
||||
let _xl_crc = buf.get_u32_le();
|
||||
|
||||
info!("decode_wal_record xl_rmid = {}" , xl_rmid);
|
||||
info!("decode_wal_record xl_rmid = {}", xl_rmid);
|
||||
|
||||
let rminfo: u8 = xl_info & !XLR_INFO_MASK;
|
||||
let rminfo: u8 = xl_info & !XLR_INFO_MASK;
|
||||
|
||||
let remaining = xl_tot_len - SizeOfXLogRecord;
|
||||
|
||||
@@ -384,15 +383,15 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
||||
let mut rnode_relnode: u32 = 0;
|
||||
let mut got_rnode = false;
|
||||
|
||||
if xl_rmid == RM_XACT_ID &&
|
||||
((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT ||
|
||||
(rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED)
|
||||
if xl_rmid == RM_XACT_ID
|
||||
&& ((rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT
|
||||
|| (rminfo & XLOG_XACT_OPMASK) == XLOG_XACT_COMMIT_PREPARED)
|
||||
{
|
||||
info!("decode_wal_record RM_XACT_ID - XLOG_XACT_COMMIT");
|
||||
|
||||
let mut blocks: Vec<DecodedBkpBlock> = Vec::new();
|
||||
|
||||
let blkno = xl_xid/CLOG_XACTS_PER_PAGE;
|
||||
let blkno = xl_xid / CLOG_XACTS_PER_PAGE;
|
||||
|
||||
let mut blk = DecodedBkpBlock {
|
||||
rnode_spcnode: 0,
|
||||
@@ -411,21 +410,24 @@ pub fn decode_wal_record(lsn: u64, rec: Bytes) -> DecodedWALRecord {
|
||||
bimg_info: 0,
|
||||
|
||||
has_data: true,
|
||||
data_len: 0
|
||||
data_len: 0,
|
||||
};
|
||||
|
||||
let fork_flags = buf.get_u8();
|
||||
blk.has_data = (fork_flags & BKPBLOCK_HAS_DATA) != 0;
|
||||
blk.data_len = buf.get_u16_le();
|
||||
|
||||
info!("decode_wal_record RM_XACT_ID blk has data with data_len {}", blk.data_len);
|
||||
info!(
|
||||
"decode_wal_record RM_XACT_ID blk has data with data_len {}",
|
||||
blk.data_len
|
||||
);
|
||||
|
||||
blocks.push(blk);
|
||||
return DecodedWALRecord {
|
||||
lsn: lsn,
|
||||
record: rec,
|
||||
blocks: blocks
|
||||
}
|
||||
blocks: blocks,
|
||||
};
|
||||
}
|
||||
|
||||
// Decode the headers
|
||||
|
||||
@@ -160,9 +160,11 @@ impl WalRedoProcess {
|
||||
.expect("failed to execute initdb");
|
||||
|
||||
if !initdb.status.success() {
|
||||
panic!("initdb failed: {}\nstderr:\n{}",
|
||||
std::str::from_utf8(&initdb.stdout).unwrap(),
|
||||
std::str::from_utf8(&initdb.stderr).unwrap());
|
||||
panic!(
|
||||
"initdb failed: {}\nstderr:\n{}",
|
||||
std::str::from_utf8(&initdb.stdout).unwrap(),
|
||||
std::str::from_utf8(&initdb.stderr).unwrap()
|
||||
);
|
||||
}
|
||||
|
||||
// Start postgres itself
|
||||
|
||||
Reference in New Issue
Block a user