Compare commits

...

10 Commits

Author SHA1 Message Date
Konstantin Knizhnik
458ca82d75 Bump postgres version 2021-09-27 12:08:55 +03:00
Konstantin Knizhnik
0dface838d Bump postgres version 2021-09-24 19:21:16 +03:00
Konstantin Knizhnik
f7878c5157 Bump postgres version 2021-09-24 19:10:18 +03:00
Konstantin Knizhnik
53c870b8e7 Store file cache outside pgdata dir 2021-09-24 19:07:30 +03:00
Konstantin Knizhnik
c23b65914e Sete zenith.file_cache_size parameter 2021-09-24 18:19:41 +03:00
Konstantin Knizhnik
b51d3f6b2b Revert "Save page received from page server in local file cache"
This reverts commit 137472db91.
2021-09-24 18:04:21 +03:00
Konstantin Knizhnik
137472db91 Save page received from page server in local file cache 2021-09-24 15:15:54 +03:00
Konstantin Knizhnik
f817985f2b Do not throw error on attempt to drop unexisted relation 2021-09-23 12:11:06 +03:00
Konstantin Knizhnik
2e94e1428e Remove import_timeline_wal function 2021-09-23 12:11:06 +03:00
Konstantin Knizhnik
9a486ca109 Do not write WAL at pageserver 2021-09-23 12:11:06 +03:00
8 changed files with 39 additions and 257 deletions

View File

@@ -324,6 +324,8 @@ impl PostgresNode {
max_connections = 100\n\
wal_sender_timeout = 0\n\
wal_level = replica\n\
zenith.file_cache_size = 4096\n\
zenith.file_cache_path = '/tmp/file.cache'\n\
listen_addresses = '{address}'\n\
port = {port}\n",
address = self.address.ip(),

View File

@@ -222,10 +222,6 @@ fn bootstrap_timeline(
// LSN, and any WAL after that.
let timeline = repo.create_empty_timeline(tli)?;
restore_local_repo::import_timeline_from_postgres_datadir(&pgdata_path, &*timeline, lsn)?;
let wal_dir = pgdata_path.join("pg_wal");
restore_local_repo::import_timeline_wal(&wal_dir, &*timeline, lsn)?;
timeline.checkpoint()?;
println!(

View File

@@ -11,7 +11,7 @@
//! parent timeline, and the last LSN that has been written to disk.
//!
use anyhow::{anyhow, bail, Context, Result};
use anyhow::{bail, Context, Result};
use bookfile::Book;
use bytes::Bytes;
use lazy_static::lazy_static;
@@ -35,7 +35,6 @@ use crate::layered_repository::inmemory_layer::FreezeLayers;
use crate::relish::*;
use crate::relish_storage::storage_uploader::QueueBasedRelishUploader;
use crate::repository::{GcResult, Repository, Timeline, WALRecord};
use crate::restore_local_repo::import_timeline_wal;
use crate::walredo::WalRedoManager;
use crate::PageServerConf;
use crate::{ZTenantId, ZTimelineId};
@@ -253,11 +252,6 @@ impl LayeredRepository {
timelineid,
timeline.get_last_record_lsn()
);
let wal_dir = self
.conf
.timeline_path(&timelineid, &self.tenantid)
.join("wal");
import_timeline_wal(&wal_dir, timeline.as_ref(), timeline.get_last_record_lsn())?;
if cfg!(debug_assertions) {
// check again after wal loading
@@ -791,47 +785,39 @@ impl Timeline for LayeredTimeline {
debug!("put_truncation: {} to {} blocks at {}", rel, relsize, lsn);
let oldsize = self
.get_relish_size(rel, self.get_last_record_lsn())?
.ok_or_else(|| {
anyhow!(
"attempted to truncate non-existent relish {} at {}",
if let Some(oldsize) = self.get_relish_size(rel, self.get_last_record_lsn())? {
if oldsize <= relsize {
return Ok(());
}
let old_last_seg = (oldsize - 1) / RELISH_SEG_SIZE;
let last_remain_seg = if relsize == 0 {
0
} else {
(relsize - 1) / RELISH_SEG_SIZE
};
// Drop segments beyond the last remaining segment.
for remove_segno in (last_remain_seg + 1)..=old_last_seg {
let seg = SegmentTag {
rel,
lsn
)
})?;
segno: remove_segno,
};
self.perform_write_op(seg, lsn, |layer| layer.drop_segment(lsn))?;
}
if oldsize <= relsize {
return Ok(());
// Truncate the last remaining segment to the specified size
if relsize == 0 || relsize % RELISH_SEG_SIZE != 0 {
let seg = SegmentTag {
rel,
segno: last_remain_seg,
};
self.perform_write_op(seg, lsn, |layer| {
layer.put_truncation(lsn, relsize % RELISH_SEG_SIZE)
})?;
}
self.decrease_current_logical_size((oldsize - relsize) * BLCKSZ as u32);
}
let old_last_seg = (oldsize - 1) / RELISH_SEG_SIZE;
let last_remain_seg = if relsize == 0 {
0
} else {
(relsize - 1) / RELISH_SEG_SIZE
};
// Drop segments beyond the last remaining segment.
for remove_segno in (last_remain_seg + 1)..=old_last_seg {
let seg = SegmentTag {
rel,
segno: remove_segno,
};
self.perform_write_op(seg, lsn, |layer| layer.drop_segment(lsn))?;
}
// Truncate the last remaining segment to the specified size
if relsize == 0 || relsize % RELISH_SEG_SIZE != 0 {
let seg = SegmentTag {
rel,
segno: last_remain_seg,
};
self.perform_write_op(seg, lsn, |layer| {
layer.put_truncation(lsn, relsize % RELISH_SEG_SIZE)
})?;
}
self.decrease_current_logical_size((oldsize - relsize) * BLCKSZ as u32);
Ok(())
}

View File

@@ -32,8 +32,8 @@ pub mod defaults {
// FIXME: This current value is very low. I would imagine something like 1 GB or 10 GB
// would be more appropriate. But a low value forces the code to be exercised more,
// which is good for now to trigger bugs.
pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 64 * 1024 * 1024;
pub const DEFAULT_CHECKPOINT_PERIOD: Duration = Duration::from_secs(100);
pub const DEFAULT_CHECKPOINT_DISTANCE: u64 = 256 * 1024 * 1024;
pub const DEFAULT_CHECKPOINT_PERIOD: Duration = Duration::from_secs(1);
pub const DEFAULT_GC_HORIZON: u64 = 64 * 1024 * 1024;
pub const DEFAULT_GC_PERIOD: Duration = Duration::from_secs(100);

View File

@@ -9,9 +9,7 @@ use std::cmp::min;
use std::fs;
use std::fs::File;
use std::io::Read;
use std::io::Seek;
use std::io::SeekFrom;
use std::path::{Path, PathBuf};
use std::path::Path;
use anyhow::Result;
use bytes::{Buf, Bytes};
@@ -283,100 +281,6 @@ fn import_slru_file(timeline: &dyn Timeline, lsn: Lsn, slru: SlruKind, path: &Pa
Ok(())
}
/// Scan PostgreSQL WAL files in given directory
/// and load all records >= 'startpoint' into the repository.
pub fn import_timeline_wal(walpath: &Path, timeline: &dyn Timeline, startpoint: Lsn) -> Result<()> {
let mut waldecoder = WalStreamDecoder::new(startpoint);
let mut segno = startpoint.segment_number(pg_constants::WAL_SEGMENT_SIZE);
let mut offset = startpoint.segment_offset(pg_constants::WAL_SEGMENT_SIZE);
let mut last_lsn = startpoint;
let checkpoint_bytes = timeline.get_page_at_lsn(RelishTag::Checkpoint, 0, startpoint)?;
let mut checkpoint = CheckPoint::decode(&checkpoint_bytes)?;
loop {
// FIXME: assume postgresql tli 1 for now
let filename = XLogFileName(1, segno, pg_constants::WAL_SEGMENT_SIZE);
let mut buf = Vec::new();
//Read local file
let mut path = walpath.join(&filename);
// It could be as .partial
if !PathBuf::from(&path).exists() {
path = walpath.join(filename + ".partial");
}
// Slurp the WAL file
let open_result = File::open(&path);
if let Err(e) = &open_result {
if e.kind() == std::io::ErrorKind::NotFound {
break;
}
}
let mut file = open_result?;
if offset > 0 {
file.seek(SeekFrom::Start(offset as u64))?;
}
let nread = file.read_to_end(&mut buf)?;
if nread != pg_constants::WAL_SEGMENT_SIZE - offset as usize {
// Maybe allow this for .partial files?
error!("read only {} bytes from WAL file", nread);
}
waldecoder.feed_bytes(&buf);
let mut nrecords = 0;
loop {
let rec = waldecoder.poll_decode();
if rec.is_err() {
// Assume that an error means we've reached the end of
// a partial WAL record. So that's ok.
trace!("WAL decoder error {:?}", rec);
break;
}
if let Some((lsn, recdata)) = rec.unwrap() {
// The previous record has been handled, let the repository know that
// it is up-to-date to this LSN. (We do this here on the "next" iteration,
// rather than right after the save_decoded_record, because at the end of
// the WAL, we will also need to perform the update of the checkpoint data
// with the same LSN as the last actual record.)
timeline.advance_last_record_lsn(last_lsn);
let decoded = decode_wal_record(recdata.clone());
save_decoded_record(&mut checkpoint, timeline, &decoded, recdata, lsn)?;
last_lsn = lsn;
} else {
break;
}
nrecords += 1;
}
info!("imported {} records up to {}", nrecords, last_lsn);
segno += 1;
offset = 0;
}
if last_lsn != startpoint {
info!(
"reached end of WAL at {}, updating checkpoint info",
last_lsn
);
let checkpoint_bytes = checkpoint.encode();
timeline.put_page_image(RelishTag::Checkpoint, 0, last_lsn, checkpoint_bytes)?;
timeline.advance_last_record_lsn(last_lsn);
} else {
info!("no WAL to import at {}", last_lsn);
}
Ok(())
}
///
/// Helper function to parse a WAL record and call the Timeline's PUT functions for all the
/// relations/pages that the record affects.

View File

@@ -23,8 +23,6 @@ use postgres_types::PgLsn;
use std::cmp::{max, min};
use std::collections::HashMap;
use std::fs;
use std::fs::{File, OpenOptions};
use std::io::{Seek, SeekFrom, Write};
use std::str::FromStr;
use std::sync::Mutex;
use std::thread;
@@ -192,15 +190,6 @@ fn walreceiver_main(
let endlsn = startlsn + data.len() as u64;
let prev_last_rec_lsn = last_rec_lsn;
write_wal_file(
conf,
startlsn,
&timelineid,
pg_constants::WAL_SEGMENT_SIZE,
data,
&tenantid,
)?;
trace!("received XLogData between {} and {}", startlsn, endlsn);
waldecoder.feed_bytes(data);
@@ -403,98 +392,3 @@ pub fn identify_system(client: &mut Client) -> Result<IdentifySystem, Error> {
Err(IdentifyError.into())
}
}
fn write_wal_file(
conf: &PageServerConf,
startpos: Lsn,
timelineid: &ZTimelineId,
wal_seg_size: usize,
buf: &[u8],
tenantid: &ZTenantId,
) -> anyhow::Result<()> {
let mut bytes_left: usize = buf.len();
let mut bytes_written: usize = 0;
let mut partial;
let mut start_pos = startpos;
const ZERO_BLOCK: &[u8] = &[0u8; XLOG_BLCKSZ];
let wal_dir = conf.wal_dir_path(timelineid, tenantid);
/* Extract WAL location for this block */
let mut xlogoff = start_pos.segment_offset(wal_seg_size);
while bytes_left != 0 {
let bytes_to_write;
/*
* If crossing a WAL boundary, only write up until we reach wal
* segment size.
*/
if xlogoff + bytes_left > wal_seg_size {
bytes_to_write = wal_seg_size - xlogoff;
} else {
bytes_to_write = bytes_left;
}
/* Open file */
let segno = start_pos.segment_number(wal_seg_size);
let wal_file_name = XLogFileName(
1, // FIXME: always use Postgres timeline 1
segno,
wal_seg_size,
);
let wal_file_path = wal_dir.join(wal_file_name.clone());
let wal_file_partial_path = wal_dir.join(wal_file_name.clone() + ".partial");
{
let mut wal_file: File;
/* Try to open already completed segment */
if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_path) {
wal_file = file;
partial = false;
} else if let Ok(file) = OpenOptions::new().write(true).open(&wal_file_partial_path) {
/* Try to open existed partial file */
wal_file = file;
partial = true;
} else {
/* Create and fill new partial file */
partial = true;
match OpenOptions::new()
.create(true)
.write(true)
.open(&wal_file_partial_path)
{
Ok(mut file) => {
for _ in 0..(wal_seg_size / XLOG_BLCKSZ) {
file.write_all(ZERO_BLOCK)?;
}
wal_file = file;
}
Err(e) => {
error!("Failed to open log file {:?}: {}", &wal_file_path, e);
return Err(e.into());
}
}
}
wal_file.seek(SeekFrom::Start(xlogoff as u64))?;
wal_file.write_all(&buf[bytes_written..(bytes_written + bytes_to_write)])?;
// FIXME: Flush the file
//wal_file.sync_all()?;
}
/* Write was successful, advance our position */
bytes_written += bytes_to_write;
bytes_left -= bytes_to_write;
start_pos += bytes_to_write as u64;
xlogoff += bytes_to_write;
/* Did we reach the end of a WAL segment? */
if start_pos.segment_offset(wal_seg_size) == 0 {
xlogoff = 0;
if partial {
fs::rename(&wal_file_partial_path, &wal_file_path)?;
}
}
}
Ok(())
}

View File

@@ -12,5 +12,5 @@ def test_pgbench(postgres: PostgresFactory, pg_bin, zenith_cli):
connstr = pg.connstr()
pg_bin.run_capture(['pgbench', '-i', connstr])
pg_bin.run_capture(['pgbench'] + '-c 10 -T 5 -P 1 -M prepared'.split() + [connstr])
pg_bin.run_capture(['pgbench', '-i', '-s', '100', connstr])
pg_bin.run_capture(['pgbench'] + '-c 1 -N -T 100 -P 1 -M prepared'.split() + [connstr])